def test_dequantize_int8_to_float32():
    shape = rand_shape_nd(4)
    qdata_np = np.random.uniform(low=-127, high=127, size=shape).astype(dtype=np.int8)
    qdata = mx.nd.array(qdata_np, dtype=np.int8)
    real_range = 402.3347
    min_range = mx.nd.array([-real_range], dtype=np.float32)
    max_range = mx.nd.array([real_range], dtype=np.float32)
    data = mx.nd.contrib.dequantize(qdata, min_range, max_range, out_type='float32')
    quantized_range = 127.0
    scale = real_range / quantized_range
    assert data.dtype == np.float32
    data_np = qdata_np * scale
    assert_almost_equal(data.asnumpy(), data_np)
示例#2
0
def test_sin():
    def sin(x):
        return nd.sin(x)

    def grad_grad_op(x):
        return -nd.sin(x)

    def grad_grad_grad_op(x):
        return -nd.cos(x)

    for dim in range(1, 5):
        shape = rand_shape_nd(dim)
        array = random_arrays(shape)
        check_second_order_unary(array, sin, grad_grad_op)
        # TODO(kshitij12345): Remove
        check_nth_order_unary(array, sin, [grad_grad_op, grad_grad_grad_op],
                              [2, 3])
示例#3
0
def test_arccosh():
    def arccosh(x):
        return nd.arccosh(x)

    def grad_grad_op(x):
        return x / (nd.sqrt(x - 1) * nd.sqrt(x + 1) * (x + 1) * (x - 1))

    sigma = random.randint(25, 100)
    mu = random.randint(500, 1000)

    for dim in range(1, 5):
        shape = rand_shape_nd(dim)
        array = random_arrays(shape)
        array = array * sigma + mu
        # Domain of arccosh 1 to infinity.
        assert ((array > 1).all())
        check_second_order_unary(array, arccosh, grad_grad_op)
示例#4
0
def test_ones():
    # test np.ones in Gluon
    class TestOnes(HybridBlock):
        def __init__(self, shape, dtype=None):
            super(TestOnes, self).__init__()
            self._shape = shape
            self._dtype = dtype

        def hybrid_forward(self, F, x, *args, **kwargs):
            return x * F.np.ones(shape, dtype)

    class TestOnesOutputType(HybridBlock):
        def hybrid_forward(self, F, x, *args, **kwargs):
            return x, F.np.ones(shape=())

    # test np.ones in imperative
    def check_ones_array_creation(shape, dtype):
        np_out = _np.ones(shape=shape, dtype=dtype)
        mx_out = np.ones(shape=shape, dtype=dtype)
        assert same(mx_out.asnumpy(), np_out)
        if dtype is None:
            assert mx_out.dtype == _np.float32
            assert np_out.dtype == _np.float64

    shapes = [(0, ), (2, 0, 2), (0, 0, 0, 0), ()]
    shapes += [rand_shape_nd(ndim, allow_zero_size=True) for ndim in range(5)]
    dtypes = [_np.int8, _np.int32, _np.float16, _np.float32, _np.float64, None]
    for shape in shapes:
        for dtype in dtypes:
            check_ones_array_creation(shape, dtype)
            x = mx.nd.array(_np.random.uniform(size=shape),
                            dtype=dtype).as_np_ndarray()
            if dtype is None:
                x = x.astype('float32')
            for hybridize in [True, False]:
                test_ones = TestOnes(shape, dtype)
                test_ones_output_type = TestOnesOutputType()
                if hybridize:
                    test_ones.hybridize()
                    test_ones_output_type.hybridize()
                y = test_ones(x)
                assert type(y) == np.ndarray
                assert same(x.asnumpy(), y.asnumpy())
                y = test_ones_output_type(x)
                assert type(y[1]) == np.ndarray
def test_tanh():
    def tanh(x):
        return nd.tanh(x)

    def grad_op(x):
        return 1 / nd.cosh(x)**2

    def grad_grad_op(x):
        return -2 * tanh(x) * grad_op(x)

    for dim in range(1, 5):
        shape = rand_shape_nd(dim)
        array = random_arrays(shape)
        check_second_order_unary(array,
                                 tanh,
                                 grad_grad_op,
                                 rtol=1e-6,
                                 atol=1e-6)
示例#6
0
def test_array_creation():
    dtypes = [_np.int8, _np.int32, _np.float16, _np.float32, _np.float64, None]
    objects = [[], (), [[1, 2], [3, 4]],
               _np.random.uniform(size=rand_shape_nd(3)),
               _np.random.uniform(size=(3, 0, 4))]
    for dtype in dtypes:
        for src in objects:
            mx_arr = np.array(src, dtype=dtype)
            assert mx_arr.context == mx.current_context()
            if isinstance(src, mx.nd.NDArray):
                np_arr = _np.array(
                    src.asnumpy(),
                    dtype=dtype if dtype is not None else _np.float32)
            else:
                np_arr = _np.array(
                    src, dtype=dtype if dtype is not None else _np.float32)
            assert mx_arr.dtype == np_arr.dtype
            assert same(mx_arr.asnumpy(), np_arr)
示例#7
0
def test_quantize_float32_to_int8():
    shape = rand_shape_nd(4)
    data = rand_ndarray(shape, 'default', dtype='float32')
    min_range = mx.nd.min(data)
    max_range = mx.nd.max(data)
    qdata, min_val, max_val = mx.nd.contrib.quantize(data, min_range, max_range, out_type='int8')
    data_np = data.asnumpy()
    min_range = min_range.asscalar()
    max_range = max_range.asscalar()
    real_range = np.maximum(np.abs(min_range), np.abs(max_range))
    quantized_range = 127.0
    scale = quantized_range / real_range
    assert qdata.dtype == np.int8
    assert min_val.dtype == np.float32
    assert max_val.dtype == np.float32
    assert same(min_val.asscalar(), -real_range)
    assert same(max_val.asscalar(), real_range)
    qdata_np = (np.sign(data_np) * np.minimum(np.abs(data_np) * scale + 0.5, quantized_range)).astype(np.int8)
    assert_almost_equal(qdata.asnumpy(), qdata_np, atol = 1)
示例#8
0
def test_np_get_dtype():
    dtypes = [_np.int8, _np.int32, _np.float16, _np.float32, _np.float64, _np.bool, _np.bool_,
              'int8', 'int32', 'float16', 'float32', 'float64', 'bool', None]
    objects = [
        [],
        (),
        [[1, 2], [3, 4]],
        _np.random.uniform(size=rand_shape_nd(3)),
        _np.random.uniform(size=(3, 0, 4))
    ]
    for dtype in dtypes:
        for src in objects:
            mx_arr = np.array(src, dtype=dtype)
            assert mx_arr.ctx == mx.current_context()
            if isinstance(src, mx.nd.NDArray):
                np_arr = _np.array(src.asnumpy(), dtype=dtype if dtype is not None else _np.float32)
            else:
                np_arr = _np.array(src, dtype=dtype if dtype is not None else _np.float32)
            assert type(mx_arr.dtype) == type(np_arr.dtype)
def test_quantize_float32_to_int8():
    shape = rand_shape_nd(4)
    data = rand_ndarray(shape, 'default', dtype='float32')
    min_range = mx.nd.min(data)
    max_range = mx.nd.max(data)
    qdata, min_val, max_val = mx.nd.contrib.quantize(data, min_range, max_range, out_type='int8')
    data_np = data.asnumpy()
    min_range = min_range.asscalar()
    max_range = max_range.asscalar()
    real_range = np.maximum(np.abs(min_range), np.abs(max_range))
    quantized_range = 127.0
    scale = quantized_range / real_range
    assert qdata.dtype == np.int8
    assert min_val.dtype == np.float32
    assert max_val.dtype == np.float32
    assert same(min_val.asscalar(), -real_range)
    assert same(max_val.asscalar(), real_range)
    qdata_np = (np.sign(data_np) * np.minimum(np.abs(data_np) * scale + 0.5, quantized_range)).astype(np.int8)
    assert same(qdata.asnumpy(), qdata_np)
示例#10
0
def test_bf16_concat():
    dshape = rand_shape_nd(4)
    a_shape = tuple(dshape)
    b_shape = tuple(dshape)

    a_sym_fp32 = mx.sym.Variable("data", shape=a_shape)
    b_sym_fp32 = mx.sym.Variable("data_1", shape=b_shape)

    a_sym_bf16 = mx.sym.Variable("data", dtype=bfloat16, shape=a_shape)
    b_sym_bf16 = mx.sym.Variable("data_1", dtype=bfloat16, shape=b_shape)
    for axis in range(0, 4):
        print(axis, a_shape)
        concat_sym_fp32 = mx.sym.concat(a_sym_fp32, b_sym_fp32, dim=axis)
        concat_sym_bf16 = mx.sym.concat(a_sym_bf16, b_sym_bf16, dim=axis)

        check_operator_accuracy(concat_sym_fp32,
                                concat_sym_bf16,
                                dshape,
                                num_input_data=2,
                                bf16_use_fp32_params=True)
示例#11
0
def test_bf16_concat():
    dshape = rand_shape_nd(4)
    a_shape = tuple(dshape)
    b_shape = tuple(dshape)

    a_sym_fp32 = mx.sym.Variable("data")
    b_sym_fp32 = mx.sym.Variable("data_1")

    a_sym_bf16 = mx.sym.Variable("data", dtype='bfloat16')
    b_sym_bf16 = mx.sym.Variable("data_1", dtype='bfloat16')
    for axis in range(0, 4):
        concat_sym_fp32 = mx.sym.concat(a_sym_fp32, b_sym_fp32, dim=axis)
        concat_sym_bf16 = mx.sym.concat(a_sym_bf16, b_sym_bf16, dim=axis)

        dshapes = {'data': a_shape, 'data_1': b_shape}
        check_operator_accuracy(concat_sym_fp32,
                                concat_sym_bf16,
                                dshapes,
                                num_input_data=2,
                                bf16_use_fp32_params=True)
示例#12
0
def test_bf16_binary_broadcast_elemwise_mixed_input(function, dtype):
    ndim = np.random.randint(1, 6)
    dshape_0 = rand_shape_nd(ndim)
    dshape_1 = tuple()
    for i in range(ndim):
        if (randint(0, 1)):
            dshape_1 += (dshape_0[i], )
        else:
            dshape_1 += (1, )

    a = mx.np.random.uniform(-1, 1, dshape_0, dtype=np.float32)
    a_fp32 = mx.np.array(a, dtype=dtype)
    a_bf16 = a.astype('bfloat16')

    b = mx.np.random.uniform(-1, 1, dshape_1, dtype=np.float32)
    b_fp32 = mx.np.array(b, dtype=dtype)
    b_bf16 = b.astype('bfloat16')

    rtol = 1e-1
    atol = 5e-1
    etol = 0

    out_bf_16_1 = function(a_bf16, b_fp32)
    out_fp_32 = function(a_fp32, b_fp32)
    assert_almost_equal_with_err(out_bf_16_1,
                                 out_fp_32,
                                 rtol=rtol,
                                 atol=atol,
                                 etol=etol)

    out_bf_16_2 = function(a_fp32, b_bf16)
    assert_almost_equal_with_err(out_bf_16_2,
                                 out_fp_32,
                                 rtol=rtol,
                                 atol=atol,
                                 etol=etol)
示例#13
0
def test_eliminate_common_expr():
    if not sys.platform.startswith('linux'):
        logging.info(
            "Bypass the CSE test on non-Linux OS as setting env variables during test does not work on Windows"
        )
        return

    def set_back_env_var(var_name, old_env_var):
        if old_env_var is None:
            os.environ.pop(var_name)
        else:
            os.environ[var_name] = old_env_var

    # helper function to test a single model
    def check_cse_on_symbol(sym, expected_savings, check_data, **kwargs):
        inputs = sym.list_inputs()
        shapes = {inp: kwargs[inp].shape for inp in inputs}
        rtol = {
            'float16': 1e-2,
            'float32': 1.5e-6,
            'float64': 1.5e-6,
        }
        atol = {
            'float16': 1e-3,
            'float32': 1e-7,
            'float64': 1e-7,
        }
        env_var_name = 'MXNET_ELIMINATE_COMMON_EXPR'
        old_env_var = os.environ.get(env_var_name, None)
        try:
            for dtype in ['float16', 'float32', 'float64']:
                data = {inp: kwargs[inp].astype(dtype) for inp in inputs}
                for grad_req in ['write', 'add']:
                    type_dict = {inp: dtype for inp in inputs}
                    os.environ[env_var_name] = '0'
                    orig_exec = sym.simple_bind(ctx=mx.cpu(0),
                                                grad_req=grad_req,
                                                type_dict=type_dict,
                                                **shapes)
                    os.environ[env_var_name] = '1'
                    cse_exec = sym.simple_bind(ctx=mx.cpu(0),
                                               grad_req=grad_req,
                                               type_dict=type_dict,
                                               **shapes)
                    fwd_orig = orig_exec.forward(is_train=True, **data)
                    out_grads = [mx.nd.ones_like(arr) for arr in fwd_orig]
                    orig_exec.backward(out_grads=out_grads)
                    fwd_cse = cse_exec.forward(is_train=True, **data)
                    cse_exec.backward(out_grads=out_grads)
                    if check_data:
                        for orig, cse in zip(fwd_orig, fwd_cse):
                            np.testing.assert_allclose(orig.asnumpy(),
                                                       cse.asnumpy(),
                                                       rtol=rtol[dtype],
                                                       atol=atol[dtype])
                        for orig, cse in zip(orig_exec.grad_arrays,
                                             cse_exec.grad_arrays):
                            if orig is None and cse is None:
                                continue
                            assert orig is not None
                            assert cse is not None
                            np.testing.assert_allclose(orig.asnumpy(),
                                                       cse.asnumpy(),
                                                       rtol=rtol[dtype],
                                                       atol=atol[dtype])
                    orig_sym_internals = orig_exec.get_optimized_symbol(
                    ).get_internals()
                    cse_sym_internals = cse_exec.get_optimized_symbol(
                    ).get_internals()
                    # test that the graph has been simplified as expected
                    assert (len(cse_sym_internals) +
                            expected_savings) == len(orig_sym_internals)
        finally:
            set_back_env_var(env_var_name, old_env_var)

    a = mx.sym.Variable('a')
    b = mx.sym.Variable('b')
    c = mx.sym.Variable('c')
    shape = rand_shape_nd(2)
    arr1 = mx.random.uniform(shape=shape)
    arr2 = mx.random.uniform(shape=shape)
    arr3 = mx.random.uniform(shape=shape)

    check_cse_on_symbol((a + 5) + (a + 5),
                        expected_savings=1,
                        check_data=True,
                        a=arr1,
                        b=arr2)
    check_cse_on_symbol((a + 1) + (a + 2),
                        expected_savings=0,
                        check_data=True,
                        a=arr1,
                        b=arr2)
    check_cse_on_symbol((1 + a) + (a + 1),
                        expected_savings=1,
                        check_data=True,
                        a=arr1,
                        b=arr2)
    check_cse_on_symbol((a + b) + (a + b),
                        expected_savings=1,
                        check_data=True,
                        a=arr1,
                        b=arr2)
    check_cse_on_symbol(((a + b) + c) + ((a + b) + c),
                        expected_savings=2,
                        check_data=True,
                        a=arr1,
                        b=arr2,
                        c=arr3)
    d = a + 1

    # a*d node gets eliminated, but then a copy is inserted to isolate the outputs, so no net gain.
    check_cse_on_symbol(mx.sym.Group([a * d, a * d]),
                        expected_savings=0,
                        check_data=True,
                        a=arr1)

    # a*d node gets eliminated, then the duplicated add-of-b, but then a copy is added for net of 1.
    check_cse_on_symbol(mx.sym.Group([a * d + b, a * d + b]),
                        expected_savings=1,
                        check_data=True,
                        a=arr1,
                        b=arr2)

    # dropout uses a resource that precludes any optimization
    check_cse_on_symbol(mx.sym.Dropout(a) + mx.sym.Dropout(a),
                        expected_savings=0,
                        check_data=False,
                        a=arr1)
示例#14
0
def test_np_prod():
    class TestProd(HybridBlock):
        def __init__(self, axis=None, dtype=None, keepdims=False):
            super(TestProd, self).__init__()
            self._axis = axis
            self._dtype = dtype
            self._keepdims = keepdims

        def hybrid_forward(self, F, a, *args, **kwargs):
            return F.np.prod(a,
                             axis=self._axis,
                             dtype=self._dtype,
                             keepdims=self._keepdims)

    in_data_dim = random.choice([3, 4])
    shape = rand_shape_nd(in_data_dim, dim=3)
    for hybridize in [False, True]:
        for keepdims in [True, False]:
            for axis in ([i for i in range(in_data_dim)] + [(), None]):
                for itype in ['float32', 'float64']:
                    for dtype in ['float32', 'float64']:
                        # test gluon
                        test_prod = TestProd(axis=axis,
                                             dtype=dtype,
                                             keepdims=keepdims)
                        if hybridize:
                            test_prod.hybridize()
                        x = np.array(_np.random.uniform(-2.0, 2.0, size=shape),
                                     dtype=itype)
                        x.attach_grad()
                        print(x.grad.dtype)
                        expected_ret = _np.prod(x.asnumpy(),
                                                axis=axis,
                                                keepdims=keepdims)
                        expected_ret = expected_ret.astype(dtype)
                        with mx.autograd.record():
                            y = test_prod(x)
                        assert y.shape == expected_ret.shape
                        assert_almost_equal(y.asnumpy(),
                                            expected_ret,
                                            rtol=1e-3,
                                            atol=1e-5,
                                            use_broadcast=False)
                        y.backward()
                        # use keepdims=True so that broadcast divide can be used to calculate
                        # grad of input
                        expected_ret = _np.prod(x.asnumpy(),
                                                axis=axis,
                                                keepdims=True)
                        assert_almost_equal(x.grad.asnumpy(),
                                            expected_ret / x.asnumpy(),
                                            rtol=1e-3,
                                            atol=1e-3,
                                            use_broadcast=False)

                        # test numeric
                        if itype == 'float32' and dtype == 'float32':
                            x_sym = mx.sym.Variable("x").as_np_ndarray()
                            mx_sym = mx.sym.np.prod(
                                x_sym,
                                axis=axis,
                                dtype=dtype,
                                keepdims=keepdims).as_nd_ndarray()
                            check_numeric_gradient(mx_sym, [x.as_nd_ndarray()],
                                                   numeric_eps=1e-3,
                                                   rtol=1e-3,
                                                   atol=1e-4,
                                                   dtype=_np.float32)

                        # test imperative
                        mx_out = np.prod(x,
                                         axis=axis,
                                         dtype=dtype,
                                         keepdims=keepdims)
                        np_out = _np.prod(x.asnumpy(),
                                          axis=axis,
                                          keepdims=keepdims).astype(dtype)
                        assert_almost_equal(mx_out.asnumpy(),
                                            np_out,
                                            rtol=1e-3,
                                            atol=1e-5,
                                            use_broadcast=False)
def test_eliminate_common_expr():
    # helper function to test a single model
    def check_cse_on_symbol(sym, expected_savings, check_data, **kwargs):
        inputs = sym.list_inputs()
        shapes = {inp : kwargs[inp].shape for inp in inputs}
        rtol = {'float16' : 1e-2,
                'float32' : 1.5e-6,
                'float64' : 1.5e-6,
                }
        atol = {'float16' : 1e-3,
                'float32' : 1e-7,
                'float64' : 1e-7,
                }
        for dtype in ['float16', 'float32', 'float64']:
            data = {inp : kwargs[inp].astype(dtype) for inp in inputs}
            for grad_req in ['write', 'add']:
                type_dict = {inp : dtype for inp in inputs}
                with environment({'MXNET_ELIMINATE_COMMON_EXPR': '0'}):
                    orig_exec = sym._simple_bind(ctx=mx.cpu(0), grad_req=grad_req,
                                                type_dict=type_dict, **shapes)
                with environment({'MXNET_ELIMINATE_COMMON_EXPR': '1'}):
                    cse_exec = sym._simple_bind(ctx=mx.cpu(0), grad_req=grad_req,
                                               type_dict=type_dict, **shapes)
                fwd_orig = orig_exec.forward(is_train=True, **data)
                out_grads = [mx.nd.ones_like(arr) for arr in fwd_orig]
                orig_exec.backward(out_grads=out_grads)
                fwd_cse = cse_exec.forward(is_train=True, **data)
                cse_exec.backward(out_grads=out_grads)
                if check_data:
                    for orig, cse in zip(fwd_orig, fwd_cse):
                        np.testing.assert_allclose(orig.asnumpy(), cse.asnumpy(),
                                                   rtol=rtol[dtype], atol=atol[dtype])
                    for orig, cse in zip(orig_exec.grad_arrays, cse_exec.grad_arrays):
                        if orig is None and cse is None:
                            continue
                        assert orig is not None
                        assert cse is not None
                        np.testing.assert_allclose(orig.asnumpy(), cse.asnumpy(),
                                                   rtol=rtol[dtype], atol=atol[dtype])
                orig_sym_internals = orig_exec.get_optimized_symbol().get_internals()
                cse_sym_internals = cse_exec.get_optimized_symbol().get_internals()
                # test that the graph has been simplified as expected
                assert (len(cse_sym_internals) + expected_savings) == len(orig_sym_internals)

    a = mx.sym.Variable('a')
    b = mx.sym.Variable('b')
    c = mx.sym.Variable('c')
    shape = rand_shape_nd(2)
    arr1 = mx.random.uniform(shape=shape)
    arr2 = mx.random.uniform(shape=shape)
    arr3 = mx.random.uniform(shape=shape)

    check_cse_on_symbol((a+1) + (a+2), expected_savings=0, check_data=True, a=arr1, b=arr2)
    check_cse_on_symbol((a+b) + (a+b), expected_savings=1, check_data=True, a=arr1, b=arr2)
    check_cse_on_symbol(((a+b)+c) +((a+b)+c), expected_savings=2, check_data=True,
                                                                  a=arr1, b=arr2, c=arr3)
    d = a + 1

    # a*d node gets eliminated, but then a copy is inserted to isolate the outputs, so no net gain.
    check_cse_on_symbol(mx.sym.Group([a*d, a*d]), expected_savings=0, check_data=True, a=arr1)

    # a*d node gets eliminated, then the duplicated add-of-b, but then a copy is added for net of 1.
    check_cse_on_symbol(mx.sym.Group([a*d+b, a*d+b]), expected_savings=1, check_data=True,
                                                                          a=arr1, b=arr2)

    # dropout uses a resource that precludes any optimization
    check_cse_on_symbol(mx.sym.Dropout(a) +
                        mx.sym.Dropout(a), expected_savings=0, check_data=False, a=arr1)
示例#16
0
 def gen(dimensions):
     shape = rand_shape_nd(dimensions, 4)
     nelems = reduce(mul, shape)
     x = nd.arange(nelems).reshape(shape)
     return x
示例#17
0
def test_np_sum():
    class TestSum(HybridBlock):
        def __init__(self, axis=None, dtype=None, keepdims=False):
            super(TestSum, self).__init__()
            self._axis = axis
            self._dtype = dtype
            self._keepdims = keepdims

        def hybrid_forward(self, F, a, *args, **kwargs):
            return F.np.sum(a,
                            axis=self._axis,
                            dtype=self._dtype,
                            keepdims=self._keepdims)

    def is_int(dtype):
        return 'int' in dtype

    in_data_dim = random.choice([2, 3, 4])
    shape = rand_shape_nd(in_data_dim, dim=3)
    acc_type = {
        'float16': 'float32',
        'float32': 'float64',
        'float64': 'float64',
        'int8': 'int32',
        'int32': 'int64',
        'int64': 'int64'
    }
    for hybridize in [False, True]:
        for keepdims in [True, False]:
            for axis in ([i for i in range(in_data_dim)] + [(), None]):
                for itype in [
                        'float16', 'float32', 'float64', 'int8', 'int32',
                        'int64'
                ]:
                    for dtype in [
                            'float16', 'float32', 'float64', 'int8', 'int32',
                            'int64'
                    ]:
                        if is_int(dtype) and not is_int(itype):
                            continue
                        # test gluon
                        test_sum = TestSum(axis=axis,
                                           dtype=dtype,
                                           keepdims=keepdims)
                        if hybridize:
                            test_sum.hybridize()
                        if is_int(itype):
                            x = _np.random.randint(-128,
                                                   128,
                                                   shape,
                                                   dtype=itype)
                            x = mx.nd.array(x)
                        else:
                            x = mx.nd.random.uniform(-1.0,
                                                     1.0,
                                                     shape=shape,
                                                     dtype=itype)
                        x = x.as_np_ndarray()
                        x.attach_grad()
                        expected_ret = _np.sum(x.asnumpy(),
                                               axis=axis,
                                               dtype=acc_type[itype],
                                               keepdims=keepdims)
                        expected_ret = expected_ret.astype(dtype)
                        with mx.autograd.record():
                            y = test_sum(x)
                        assert y.shape == expected_ret.shape
                        assert_almost_equal(
                            y.asnumpy(),
                            expected_ret,
                            rtol=1e-3 if dtype == 'float16' else 1e-3,
                            atol=1e-5 if dtype == 'float16' else 1e-5,
                            use_broadcast=False)

                        y.backward()
                        assert same(x.grad.asnumpy(),
                                    _np.ones(shape=x.shape, dtype=x.dtype))

                        # test numeric
                        if itype == 'float32' and dtype == 'float32':
                            x_sym = mx.sym.Variable("x").as_np_ndarray()
                            mx_sym = mx.sym.np.sum(
                                x_sym,
                                axis=axis,
                                dtype=dtype,
                                keepdims=keepdims).as_nd_ndarray()
                            check_numeric_gradient(mx_sym, [x.as_nd_ndarray()],
                                                   numeric_eps=1e-3,
                                                   rtol=1e-3,
                                                   atol=1e-4,
                                                   dtype=_np.float32)

                        # test imperative
                        mx_out = np.sum(x,
                                        axis=axis,
                                        dtype=dtype,
                                        keepdims=keepdims)
                        np_out = _np.sum(x.asnumpy(),
                                         axis=axis,
                                         dtype=acc_type[itype],
                                         keepdims=keepdims).astype(dtype)
                        assert_almost_equal(mx_out.asnumpy(),
                                            np_out,
                                            rtol=1e-3,
                                            atol=1e-5,
                                            use_broadcast=False)