def verify_broadcast_binary_ele(lhs_shape, rhs_shape, typ="add"):
    # Build the logic and compile the function
    A = tvm.placeholder(shape=lhs_shape, name="A")
    B = tvm.placeholder(shape=rhs_shape, name="B")
    if typ == "add":
        C = topi.broadcast_add(A, B)
    elif typ == "sub":
        C = topi.broadcast_sub(A, B)
    elif typ == "div":
        C = topi.broadcast_div(A, B)
    elif typ == "mul":
        C = topi.broadcast_mul(A, B)
    elif typ == "maximum":
        C = topi.broadcast_maximum(A, B)
    elif typ == "minimum":
        C = topi.broadcast_minimum(A, B)
    else:
        raise NotImplementedError

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        with tvm.target.create(device):
            s = topi.generic.schedule_broadcast(C)
        ctx = tvm.context(device, 0)
        foo = tvm.build(s, [A, B, C],
                        device,
                        name="broadcast_binary" + "_" + typ)
        lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype)
        rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype)
        if typ == "add":
            out_npy = lhs_npy + rhs_npy
        elif typ == "sub":
            out_npy = lhs_npy - rhs_npy
        elif typ == "div":
            rhs_npy = np.abs(rhs_npy) + 0.001
            out_npy = lhs_npy / rhs_npy
        elif typ == "mul":
            out_npy = lhs_npy * rhs_npy
        elif typ == "maximum":
            out_npy = np.maximum(lhs_npy, rhs_npy)
        elif typ == "minimum":
            out_npy = np.minimum(lhs_npy, rhs_npy)
        else:
            raise NotImplementedError
        lhs_nd = tvm.nd.array(lhs_npy, ctx)
        rhs_nd = tvm.nd.array(rhs_npy, ctx)
        out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx)
        for _ in range(1):
            foo(lhs_nd, rhs_nd, out_nd)
        np.testing.assert_allclose(out_nd.asnumpy(),
                                   out_npy,
                                   rtol=1E-4,
                                   atol=1E-4)

    check_device("opencl")
    check_device("cuda")
    check_device("metal")
    check_device("rocm")
示例#2
0
def compute_conv2d(attrs, inputs, _):
    """Compute definition of conv2d"""
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int("groups")
    channels = attrs.get_int("channels")
    layout = attrs["layout"]
    assert layout == "NCHW" or layout == "NHWC"
    (dilation_h, dilation_w) = dilation
    if dilation_h < 1 or dilation_w < 1:
        raise ValueError("dilation should be positive value")
    elif dilation == (1, 1):
        kernel = inputs[1]
    elif layout == "NCHW":
        kernel = topi.nn.dilate(inputs[1], [1, 1, dilation_h, dilation_w])
    else:  #layout == NHWC
        kernel = topi.nn.dilate(inputs[1], [1, dilation_h, dilation_w, 1])

    if groups == 1:
        out = topi.nn.conv2d(inputs[0], kernel, strides, padding, layout)
    elif groups == get_const_int(inputs[0].shape[1]) and groups == channels:
        out = topi.nn.depthwise_conv2d_nchw(inputs[0], kernel, strides,
                                            padding)
    else:
        raise ValueError("not support arbitrary group number for now")
    if attrs.get_bool("use_bias"):
        bias = inputs[2]
        expand_axis = 1 if layout == "NCHW" else 0
        bias = topi.expand_dims(bias, axis=expand_axis, num_newaxis=2)
        out = topi.broadcast_add(out, bias)
    return out
示例#3
0
def verify_broadcast_binary_ele(lhs_shape, rhs_shape, typ="add"):
    # Build the logic and compile the function
    A = tvm.placeholder(shape=lhs_shape, name="A")
    B = tvm.placeholder(shape=rhs_shape, name="B")
    if typ == "add":
        C = topi.broadcast_add(A, B)
    elif typ == "sub":
        C = topi.broadcast_sub(A, B)
    elif typ == "div":
        C = topi.broadcast_div(A, B)
    elif typ == "mul":
        C = topi.broadcast_mul(A, B)
    elif typ == "maximum":
        C = topi.broadcast_maximum(A, B)
    elif typ == "minimum":
        C = topi.broadcast_minimum(A, B)
    else:
        raise NotImplementedError
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_broadcast(C)
        foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + typ)
        lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype)
        rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype)
        if typ == "add":
            out_npy = lhs_npy + rhs_npy
        elif typ == "sub":
            out_npy = lhs_npy - rhs_npy
        elif typ == "div":
            rhs_npy = np.abs(rhs_npy) + 0.001
            out_npy = lhs_npy / rhs_npy
        elif typ == "mul":
            out_npy = lhs_npy * rhs_npy
        elif typ == "maximum":
            out_npy = np.maximum(lhs_npy, rhs_npy)
        elif typ == "minimum":
            out_npy = np.minimum(lhs_npy, rhs_npy)
        else:
            raise NotImplementedError
        lhs_nd = tvm.nd.array(lhs_npy, ctx)
        rhs_nd = tvm.nd.array(rhs_npy, ctx)
        out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx)
        for _ in range(1):
            foo(lhs_nd, rhs_nd, out_nd)
        np.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)

    check_device("vulkan")
    check_device("opencl")
    check_device("cuda")
    check_device("metal")
    check_device("rocm")
示例#4
0
def test_broadcast_binary_op(lhs_shape, rhs_shape, typ="add"):
    global TASK
    TASK = "bcast_binary_" + typ + "_lhs" +\
           "_".join([str(ele) for ele in lhs_shape]) +\
           "rhs" + "_".join([str(ele) for ele in rhs_shape])
    A = tvm.te.placeholder(shape=lhs_shape, name="A")
    B = tvm.te.placeholder(shape=rhs_shape, name="B")
    if typ == "add":
        C = topi.broadcast_add(A, B)
    elif typ == "sub":
        C = topi.broadcast_sub(A, B)
    elif typ == "div":
        C = topi.broadcast_div(A, B)
    elif typ == "mul":
        C = topi.broadcast_mul(A, B)
    elif typ == "maximum":
        C = topi.broadcast_maximum(A, B)
    elif typ == "minimum":
        C = topi.broadcast_minimum(A, B)
    else:
        raise NotImplementedError
    s = topi.cuda.schedule_broadcast(C)
    fcuda = tvm.build(s, [A, B, C],
                      "cuda",
                      name="broadcast_binary" + "_" + typ)

    lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype)
    rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype)
    if typ == "add":
        out_npy = lhs_npy + rhs_npy
    elif typ == "sub":
        out_npy = lhs_npy - rhs_npy
    elif typ == "div":
        rhs_npy = np.abs(rhs_npy) + 0.001
        out_npy = lhs_npy / rhs_npy
    elif typ == "mul":
        out_npy = lhs_npy * rhs_npy
    elif typ == "maximum":
        out_npy = np.maximum(lhs_npy, rhs_npy)
    elif typ == "minimum":
        out_npy = np.minimum(lhs_npy, rhs_npy)
    lhs_nd = tvm.nd.array(lhs_npy, tvm.gpu())
    rhs_nd = tvm.nd.array(rhs_npy, tvm.gpu())
    out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), tvm.gpu())
    for _ in range(2):
        fcuda(lhs_nd, rhs_nd, out_nd)
    tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
示例#5
0
def test_broadcast_binary_op(lhs_shape, rhs_shape, typ="add"):
    global TASK
    TASK = "bcast_binary_" + typ + "_lhs" +\
           "_".join([str(ele) for ele in lhs_shape]) +\
           "rhs" + "_".join([str(ele) for ele in rhs_shape])
    A = tvm.placeholder(shape=lhs_shape, name="A")
    B = tvm.placeholder(shape=rhs_shape, name="B")
    if typ == "add":
        C = topi.broadcast_add(A, B)
    elif typ == "sub":
        C = topi.broadcast_sub(A, B)
    elif typ == "div":
        C = topi.broadcast_div(A, B)
    elif typ == "mul":
        C = topi.broadcast_mul(A, B)
    elif typ == "maximum":
        C = topi.broadcast_maximum(A, B)
    elif typ == "minimum":
        C = topi.broadcast_minimum(A, B)
    else:
        raise NotImplementedError
    s = topi.cuda.schedule_broadcast(C)
    fcuda = tvm.build(s, [A, B, C], "cuda", name="broadcast_binary" + "_" + typ)

    lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype)
    rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype)
    if typ == "add":
        out_npy = lhs_npy + rhs_npy
    elif typ == "sub":
        out_npy = lhs_npy - rhs_npy
    elif typ == "div":
        rhs_npy = np.abs(rhs_npy) + 0.001
        out_npy = lhs_npy / rhs_npy
    elif typ == "mul":
        out_npy = lhs_npy * rhs_npy
    elif typ == "maximum":
        out_npy = np.maximum(lhs_npy, rhs_npy)
    elif typ == "minimum":
        out_npy = np.minimum(lhs_npy, rhs_npy)
    lhs_nd = tvm.nd.array(lhs_npy, tvm.gpu())
    rhs_nd = tvm.nd.array(rhs_npy, tvm.gpu())
    out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), tvm.gpu())
    for _ in range(2):
        fcuda(lhs_nd, rhs_nd, out_nd)
    np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
示例#6
0
def compute_contrib_conv2d_NCHWc(attrs, inputs, _):
    """Compute definition of conv2d NCHWc"""
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    kh, kw = attrs.get_int_tuple('kernel_size')
    groups = attrs.get_int("groups")
    channels = attrs.get_int("channels")
    assert dilation == (1, 1), "not support dilate now"
    if groups == 1:
        out = topi.nn.conv2d_NCHWc(inputs[0], inputs[1], channels, (kh, kw), strides, padding)
    else:
        raise ValueError("not support arbitrary group number > 1 for now")
    if attrs.get_bool("use_bias"):
        bias = inputs[2]
        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
        out = topi.broadcast_add(out, bias)
    return out
示例#7
0
def mxnet_check():
    """This is a simple test function for MXNet bridge

    It is not included as nosetests, because of its dependency on mxnet

    User can directly run this script to verify correctness.
    """
    import mxnet as mx
    import topi
    import tvm
    import numpy as np
    from tvm.contrib.mxnet import to_mxnet_func

    # build a TVM function through topi
    n = 20
    shape = (20,)
    scale = tvm.var("scale", dtype="float32")
    x = tvm.placeholder(shape)
    y = tvm.placeholder(shape)
    z = topi.broadcast_add(x, y)
    zz = tvm.compute(shape, lambda *i: z(*i) * scale)

    target = tvm.target.cuda()

    # build the function
    with target:
        s = topi.generic.schedule_injective(zz)
        f = tvm.build(s, [x, y, zz, scale])

    # get a mxnet version
    mxf = to_mxnet_func(f, const_loc=[0, 1])

    ctx = mx.gpu(0)
    xx = mx.nd.uniform(shape=shape, ctx=ctx)
    yy = mx.nd.uniform(shape=shape, ctx=ctx)
    zz = mx.nd.empty(shape=shape, ctx=ctx)

    # invoke myf: this runs in mxnet engine
    mxf(xx, yy, zz, 10.0)
    mxf(xx, yy, zz, 10.0)


    tvm.testing.assert_allclose(
        zz.asnumpy(), (xx.asnumpy() + yy.asnumpy()) * 10)
示例#8
0
def mxnet_check():
    """This is a simple test function for MXNet bridge

    It is not included as pytests, because of its dependency on mxnet

    User can directly run this script to verify correctness.
    """
    import mxnet as mx
    import topi
    import tvm
    from tvm import te
    import numpy as np
    from tvm.contrib.mxnet import to_mxnet_func

    # build a TVM function through topi
    n = 20
    shape = (20, )
    scale = te.var("scale", dtype="float32")
    x = te.placeholder(shape)
    y = te.placeholder(shape)
    z = topi.broadcast_add(x, y)
    zz = te.compute(shape, lambda *i: z(*i) * scale)

    target = tvm.target.cuda()

    # build the function
    with target:
        s = topi.generic.schedule_injective(zz)
        f = tvm.build(s, [x, y, zz, scale])

    # get a mxnet version
    mxf = to_mxnet_func(f, const_loc=[0, 1])

    ctx = mx.gpu(0)
    xx = mx.nd.uniform(shape=shape, ctx=ctx)
    yy = mx.nd.uniform(shape=shape, ctx=ctx)
    zz = mx.nd.empty(shape=shape, ctx=ctx)

    # invoke myf: this runs in mxnet engine
    mxf(xx, yy, zz, 10.0)
    mxf(xx, yy, zz, 10.0)

    tvm.testing.assert_allclose(zz.asnumpy(),
                                (xx.asnumpy() + yy.asnumpy()) * 10)
示例#9
0
def compute_conv2d_transpose(attrs, inputs, _):
    """Compute definition of conv2d_transpose"""
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int("groups")
    layout = attrs["layout"]
    assert layout == "NCHW", "only support nchw for now"
    assert dilation == (1, 1), "not support dilate now"
    assert groups == 1, "only support groups == 1 for now"
    out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding)
    if attrs.get_bool("use_bias"):
        bias = inputs[2]
        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
        out = topi.broadcast_add(out, bias)
    output_padding = attrs.get_int_tuple("output_padding")
    out = topi.nn.pad(out, \
        [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]])
    return out
示例#10
0
文件: nn.py 项目: masa-ito-fj/nnvm
def compute_conv2d_transpose(attrs, inputs, _):
    """Compute definition of conv2d_transpose"""
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int("groups")
    layout = attrs["layout"]
    assert layout == "NCHW", "only support nchw for now"
    assert dilation == (1, 1), "not support dilate now"
    assert groups == 1, "only support groups == 1 for now"
    out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding)
    if attrs.get_bool("use_bias"):
        bias = inputs[2]
        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
        out = topi.broadcast_add(out, bias)
    output_padding = attrs.get_int_tuple("output_padding")
    out = topi.nn.pad(out, \
        [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]])
    return out
示例#11
0
文件: nn.py 项目: fatu/nnvm
def compute_conv2d(attrs, inputs, _):
    """Compute definition of conv2d"""
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int("groups")
    channels = attrs.get_int("channels")
    layout = attrs["layout"]
    assert layout == "NCHW", "only support nchw for now"
    assert dilation == (1, 1), "not support dilate now"
    if groups == 1:
        out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding)
    elif groups == get_const_int(inputs[0].shape[1]) and groups == channels:
        out = topi.nn.depthwise_conv2d_nchw(inputs[0], inputs[1], strides, padding)
    else:
        raise ValueError("not support arbitrary group number for now")
    if attrs.get_bool("use_bias"):
        bias = inputs[2]
        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
        out = topi.broadcast_add(out, bias)
    return out
示例#12
0
文件: nn.py 项目: masa-ito-fj/nnvm
def compute_conv2d(attrs, inputs, _):
    """Compute definition of conv2d"""
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int("groups")
    channels = attrs.get_int("channels")
    layout = attrs["layout"]
    assert layout == "NCHW", "only support nchw for now"
    assert dilation == (1, 1), "not support dilate now"
    if groups == 1:
        out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding)
    elif groups == get_const_int(inputs[0].shape[1]) and groups == channels:
        out = topi.nn.depthwise_conv2d_nchw(inputs[0], inputs[1], strides, padding)
    else:
        raise ValueError("not support arbitrary group number for now")
    if attrs.get_bool("use_bias"):
        bias = inputs[2]
        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
        out = topi.broadcast_add(out, bias)
    return out