示例#1
0
文件: l2_norm.py 项目: zheng-da/tvm
def l2norm_instance(data, eps, axis=None):
    """Perform L2norm on the input data

    For axis=None, y(i, j) = x(i, j) / sqrt(max(sum(x^2), eps))

    Parameters
    ----------
    data : tvm.Tensor
        4-D with NCHW or NHWC layout

    eps : float
        epsilon value

    axis : list of int
        axis over the normalization applied

    Returns
    -------
    output : tvm.Tensor
        4-D output with same shape
    """
    assert len(data.shape) == 4, "only support 4-dim lrn"
    dot_value = topi.cpp.pow(data, 2.0)
    sum_value = topi.sum(dot_value, axis=axis, keepdims=True)
    expand_sum = topi.broadcast_to(sum_value, data.shape)
    return topi.broadcast_div(data, topi.sqrt(\
                tvm.compute(expand_sum.shape, lambda i, j, k, l:\
                tvm.max(expand_sum[i, j, k, l], eps), tag='l2norm')))
示例#2
0
def verify_broadcast_to_ele(in_shape, out_shape):
    # Build the logic and compile the function
    A = tvm.placeholder(shape=in_shape, name="A")
    B = topi.broadcast_to(A, out_shape)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_broadcast(B)
        ctx = tvm.context(device, 0)
        foo = tvm.build(s, [A, B], device, name="broadcast_to")
        data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
        out_npy = np.broadcast_to(data_npy, out_shape)
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx)
        for _ in range(1):
            foo(data_nd, out_nd)
        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    check_device("opencl")
    check_device("cuda")
    check_device("metal")
    check_device("rocm")
示例#3
0
def verify_broadcast_to_ele(in_shape, out_shape):
    # Build the logic and compile the function
    A = tvm.placeholder(shape=in_shape, name="A")
    B = topi.broadcast_to(A, out_shape)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_broadcast(B)
        foo = tvm.build(s, [A, B], device, name="broadcast_to")
        data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
        out_npy = np.broadcast_to(data_npy, out_shape)
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx)
        for _ in range(1):
            foo(data_nd, out_nd)
        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    check_device("vulkan")
    check_device("opencl")
    check_device("cuda")
    check_device("metal")
    check_device("rocm")
示例#4
0
def make_broadcast_to(shape, to_shape, tgt, tgt_host, func_name,
                      dtype="float32"):
    A = tvm.placeholder(shape, dtype=dtype, name="A")
    C = topi.broadcast_to(A, to_shape)

    s = tvm.create_schedule(C.op)
    f = tvm.build(s, [A, C], tgt, target_host=tgt_host, name=func_name)
    return f
def make_broadcast_to(shape, to_shape, tgt, tgt_host, func_name,
                      dtype="float32"):
    A = tvm.placeholder(shape, dtype=dtype, name="A")
    C = topi.broadcast_to(A, to_shape)

    s = tvm.create_schedule(C.op)
    f = tvm.build(s, [A, C], tgt, target_host=tgt_host, name=func_name)
    return f
示例#6
0
def make_broadcast_to(shape, to_shape, tgt, tgt_host, func_name,
                      dtype="float32"):
    A = te.placeholder(shape, dtype=dtype, name="A")
    C = topi.broadcast_to(A, to_shape)

    s = te.create_schedule(C.op)
    if tgt=="cuda":
        bx,tx=s[C].split(C.op.axis[1],factor=32)
        s[C].bind(bx,te.thread_axis("blockIdx.x"))
        s[C].bind(tx,te.thread_axis("threadIdx.x"))
        # print(tvm.lower(s, [A, C], simple_mode=True))

    f = tvm.build(s, [A, C], tgt, target_host=tgt_host, name=func_name)
    return f
def compute_expand_like(attrs, inputs, _):
    """Compute definition of expand_like"""
    if len(inputs[0].shape) == len(inputs[1].shape):
        # If the number of dimensions is not changed then it is just a broadcasting
        return topi.broadcast_to(inputs[0], inputs[1].shape)

    exclude = attrs.get_bool("exclude")
    axis = attrs.get_int_tuple("axis")
    if exclude:
        exclude_axis = (axis, ) if isinstance(axis, int) else axis
        axis = []
        for item in range(len(inputs[1].shape)):
            if item not in exclude_axis:
                axis.append(item)
        axis = tuple(axis)

    return topi.transform.expand_like(inputs[0], inputs[1], axis)
示例#8
0
文件: transform.py 项目: bddppq/tvm
def compute_expand_like(attrs, inputs, _):
    """Compute definition of expand_like"""
    if len(inputs[0].shape) == len(inputs[1].shape):
        # If the number of dimensions is not changed then it is just a broadcasting
        return topi.broadcast_to(inputs[0], inputs[1].shape)

    exclude = attrs.get_bool("exclude")
    axis = attrs.get_int_tuple("axis")
    if exclude:
        exclude_axis = (axis,) if isinstance(axis, int) else axis
        axis = []
        for item in range(len(inputs[1].shape)):
            if item not in exclude_axis:
                axis.append(item)
        axis = tuple(axis)

    return topi.transform.expand_like(inputs[0], inputs[1], axis)
示例#9
0
def test_broadcast_to(in_shape, out_shape):
    global TASK
    TASK = "bcast_to_i" + "_".join([str(ele) for ele in in_shape])\
           + "o" + "_".join([str(ele) for ele in out_shape])
    # Build the logic and compile the function
    A = tvm.placeholder(shape=in_shape, name="A")
    B = topi.broadcast_to(A, out_shape)
    s = topi.cuda.schedule_broadcast(B)
    fcuda = tvm.build(s, [A, B], "cuda", name="broadcast_to")

    data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
    out_npy = np.broadcast_to(data_npy, out_shape)

    data_nd = tvm.nd.array(data_npy, tvm.gpu())
    out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), tvm.gpu())
    for _ in range(2):
        fcuda(data_nd, out_nd)
    np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
示例#10
0
def make_broadcast_to(shape,
                      to_shape,
                      tgt,
                      tgt_host,
                      func_name,
                      dtype="float32"):
    A = tvm.placeholder(shape, dtype=dtype, name="A")
    C = topi.broadcast_to(A, to_shape)

    s = tvm.create_schedule(C.op)

    block_x = tvm.thread_axis("blockIdx.x")
    thread_x = tvm.thread_axis("threadIdx.x")

    s[C].bind(C.op.axis[0], block_x)
    if len(to_shape) > 1:
        s[C].bind(C.op.axis[1], thread_x)

    f = tvm.build(s, [A, C], tgt, target_host=tgt_host, name=func_name)

    return _export_module(f, func_name, remote)
示例#11
0
文件: topi0.py 项目: ryansoq/nixtvm
def demo_broadcast():
    """ Check that broad works as expected """

    num_classes = 10
    batch_size = 1
    img_h = 28
    img_w = 28
    img_c = 1

    f1_c = 1

    x = tvm.placeholder((batch_size, img_h, img_w, img_c), name='x')
    b = tvm.placeholder((img_c, ), name='b')

    # Plus here will perform auto-broadcast
    y = x + topi.broadcast_to(b, (batch_size, 1, 1, img_c))

    npy = run_tvm(
        0, 1, {
            x: np.ones(get_shape(x)).astype(np.float32),
            b: np.ones(get_shape(b)).astype(np.float32)
        }, y)

    print(npy.last_data[0, :, :, 0])
示例#12
0
文件: tensor.py 项目: vkandola/nnvm
def compute_softmax(attrs, inputs, out_info):
    """Compute definition of softmax"""
    return topi.broadcast_to(inputs[0], shape=out_info[0].shape)
示例#13
0
def demo_conv2d():
    lrate = 0.1
    nbatches = 100  # batches to train

    num_classes = 10
    batch_size = 10
    img_h = 28
    img_w = 28
    img_c = 1

    f1_c = 4
    f2_c = 5
    f3_units = 16

    x = tvm.placeholder((batch_size, img_h, img_w, img_c), name='x')
    y = tvm.placeholder((batch_size, num_classes), name='y')

    print('Block1')
    w1 = tvm.placeholder((3, 3, img_c, f1_c), name='w1')
    b1 = tvm.placeholder((f1_c, ), name='b1')
    t = topi.nn.conv2d(x, w1, 1, 0, layout='NHWC', out_dtype=tvm.float32)
    t = t + topi.broadcast_to(b1, (batch_size, 1, 1, f1_c))
    print('Block1: after-biasing shape is', get_shape(t))
    t = topi.nn.pool(t, [2, 2], [2, 2], [0, 0, 0, 0], 'max', layout='NHWC')
    print('Block1: after-pooling shape is', get_shape(t))
    t = topi.nn.relu(t)
    print('Block1: after-relu shape is', get_shape(t))

    print('Block2')
    w2 = tvm.placeholder((3, 3, f1_c, f2_c), name='w2')
    b2 = tvm.placeholder((f2_c, ), name='b2')
    t = topi.nn.conv2d(t, w2, 1, 0, layout='NHWC', out_dtype=tvm.float32)
    t = t + topi.broadcast_to(b2, (batch_size, 1, 1, f2_c))
    print('Block2: after-biasing shape is', get_shape(t))
    t = topi.nn.pool(t, [2, 2], [2, 2], [0, 0, 0, 0], 'max', layout='NHWC')
    print('Block2: after-pooling shape is', get_shape(t))
    t = topi.nn.relu(t)
    print('Block2: after-relu shape is', get_shape(t))
    t = topi.nn.flatten(t)
    print('Block2: after-flattern shape is', get_shape(t))

    print('Block3')
    w3 = tvm.placeholder((f3_units, get_shape(t)[1]))
    b3 = tvm.placeholder((f3_units, ))
    t = topi.nn.dense(t, w3, b3)
    print('Block3: after-dense shape is', get_shape(t))

    print('Block4')
    w4 = tvm.placeholder((num_classes, get_shape(t)[1]))
    b4 = tvm.placeholder((num_classes, ))
    t = topi.nn.dense(t, w4, b4)
    print('Block4: after-dense shape is', get_shape(t))
    t = topi.nn.relu(t)

    p = topi.argmax(t, axis=1)
    # TODO: check the correctnesss of the log_softmax expression
    # TODO: figure out the difference between it and standard cross-entropy loss
    l = -topi.sum(y * topi.nn.log_softmax(t)) / batch_size

    print('Block4: loss shape is', get_shape(l))

    ones = topi.full_like(l, 1.0)
    #[dl_dw1,dl_db1,dl_dw2,dl_db2,dl_dw3,dl_db3,dl_dw4,dl_db4]
    params = [w1, b1, w2, b2, w3, b3, w4, b4]

    dl = list(tvm.ir_pass.JacobianRecursive(l, params, ones))
    assert len(params) == len(dl)
    print('dl_dw1 weight is', get_shape(params[0]))

    sdl = tvm.create_schedule([p.op for p in [x, y, l] + params + dl])
    mdl = tvm.build(sdl, [x, y, l] + params + dl)
    print('Train+Inference module', mdl)

    # sl = tvm.create_schedule([l.op])
    # ml = tvm.build(sdl, [x,y] + params + [l])
    # print('Inference module',ml)

    state = {}
    for p in params:
        state.update({
            p:
            tvm.nd.array(
                np.random.uniform(-1.0, 1.0,
                                  size=get_shape(p)).astype(np.float32))
        })

    grads = {}
    for p, g in zip(params, dl):
        grads.update({p: tvm.nd.empty(get_shape(g))})

    for ib in range(nbatches):
        b = range(ib * batch_size, (ib + 1) * batch_size)
        tx = tvm.nd.array(mnist_img(b))
        ty = tvm.nd.array(mnist_cls_oh(b))
        tl = tvm.nd.empty(shape=(), dtype=tvm.float32)

        print('Entering')
        mdl(*([tx, ty, tl] + list(state.values()) + list(grads.values())))
        print('Done', 'loss', tl.asnumpy())

        state2 = {}
        for p in params:
            state2.update({
                p:
                tvm.nd.array(state[p].asnumpy() - lrate * grads[p].asnumpy())
            })

        state = state2