예제 #1
0
 def intrin_func(ins, outs):
     x_ptr = ins[0].access_ptr("r")
     y_ptr = ins[1].access_ptr("r")
     z_ptr = outs[0].access_ptr("w")
     body = tvm.call_packed("gemv", x_ptr, y_ptr, z_ptr, m, n, l)
     reset = tvm.call_packed("fill_zero", z_ptr, m, n)
     update = tvm.call_packed("gemv_add", x_ptr, y_ptr, z_ptr, m, n, l)
     return body, reset, update
예제 #2
0
 def intrin_func(ins, outs):
     ww, xx = ins
     zz = outs[0]
     ww_ptr = ww.access_ptr("r")
     xx_ptr = xx.access_ptr("r")
     zz_ptr = zz.access_ptr("w")
     body = tvm.call_packed(
         "gemv", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
     update = tvm.call_packed(
         "gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
     return body, None, update
예제 #3
0
 def intrin_func(ins, outs):
     ww, xx = ins
     zz = outs[0]
     ww_ptr = ww.access_ptr("r")
     xx_ptr = xx.access_ptr("r")
     zz_ptr = zz.access_ptr("w")
     body = tvm.call_packed("gemv", ww_ptr, xx_ptr, zz_ptr, n,
                            ww.strides[0])
     update = tvm.call_packed("gemv_add", ww_ptr, xx_ptr, zz_ptr, n,
                              ww.strides[0])
     return body, None, update
예제 #4
0
def test_sort_np():
    dshape = (1, 2, 3, 4, 5, 6)
    axis = 4
    reduced_shape = (1, 2, 3, 4, 6)
    is_descend = False
    data = tvm.placeholder(dshape, name='data')
    sort_num = tvm.placeholder(reduced_shape, name="sort_num", dtype="int32")
    out = tvm.extern(
        data.shape, [data, sort_num],
        lambda ins, outs: tvm.call_packed("tvm.contrib.sort.argsort", ins[0],
                                          ins[1], outs[0], axis, is_descend),
        dtype='int32',
        name="sort_tensor")

    ctx = tvm.cpu(0)
    target = "llvm"
    s = tvm.create_schedule(out.op)
    f = tvm.build(s, [data, sort_num, out], target)

    np_data = np.random.uniform(size=dshape)
    np_out = np.argsort(np_data, axis=axis)
    sort_num_input = np.full(reduced_shape, dshape[axis])
    a = tvm.nd.array(np.array(np_data).astype(data.dtype), ctx)
    b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
    c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
    f(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), np_out, rtol=1e-5)
예제 #5
0
def test_sort():
    n = 2
    l = 5
    m = 3
    data = tvm.placeholder((n, l, m), name='data')
    sort_num = tvm.placeholder((n, m), name="sort_num", dtype="int32")
    axis = 1
    is_descend = True
    out = tvm.extern(
        data.shape, [data, sort_num],
        lambda ins, outs: tvm.call_packed("tvm.contrib.sort.argsort", ins[0],
                                          ins[1], outs[0], axis, is_descend),
        dtype='int32',
        name="sort_tensor")
    input = [[[1, 2, 3], [2, 4.5, 3.5], [1.1, 0.5, 1], [3.2, -5, 0.5],
              [1.5, 0, 0]],
             [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]]
    sort_num_input = [[1, 2, 3], [4, 5, 5]]
    sorted_index = [[[0, 1, 1], [1, 0, 0], [2, 2, 2], [3, 3, 3], [4, 4, 4]],
                    [[3, 4, 4], [2, 3, 3], [1, 2, 2], [0, 1, 1], [4, 0, 0]]]

    ctx = tvm.cpu(0)
    target = "llvm"
    s = tvm.create_schedule(out.op)
    f = tvm.build(s, [data, sort_num, out], target)
    a = tvm.nd.array(np.array(input).astype(data.dtype), ctx)
    b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
    c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
    f(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(),
                                np.array(sorted_index).astype(out.dtype),
                                rtol=1e-5)
예제 #6
0
파일: sort.py 프로젝트: Manikant92/tvm
def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"):
    """Get the top k elements in an input tensor along the given axis.

    Parameters
    ----------
    data : tvm.Tensor
        The input tensor.

    k : int, optional
        Number of top elements to select. Return all elements if k < 1.

    axis : int, optional
        Axis long which to sort the input tensor.

    ret_type: str, optional
        The return type [both, values, indices].
        "both": return both top k data and indices.
        "values": return top k data only.
        "indices": return top k indices only.

    is_ascend : boolean, optional
        Whether to sort in ascending or descending order.

    dtype : string, optional
        The data type of the indices output.

    Returns
    -------
    out : tvm.Tensor or List[tvm.Tensor]
        The computed result.
    """
    assert ret_type in ["both", "values", "indices"]
    data_buf = api.decl_buffer(data.shape,
                               data.dtype,
                               "data_buf",
                               data_alignment=8)
    out_shape = list(get_const_tuple(data.shape))
    if k >= 1:
        out_shape[axis] = k
    out_bufs = []
    if ret_type in ["both", "values"]:
        out_bufs.append(
            api.decl_buffer(out_shape,
                            data.dtype,
                            "value_buf",
                            data_alignment=8))
    if ret_type in ["both", "indices"]:
        out_bufs.append(
            api.decl_buffer(out_shape, dtype, "indices_buf", data_alignment=8))
    out_shapes = [out_shape] * len(out_bufs)

    out = tvm.extern(
        out_shapes, [data],
        lambda ins, outs: tvm.call_packed("tvm.contrib.sort.topk", ins[0], *
                                          outs, k, axis, ret_type, is_ascend),
        in_buffers=[data_buf],
        out_buffers=out_bufs,
        name="topk_cpu",
        tag="topk_cpu")
    return out
예제 #7
0
파일: test_sort.py 프로젝트: LANHUIYING/tvm
def test_sort():
    n = 2
    l = 5
    m = 3
    data = tvm.placeholder((n, l, m), name='data')
    sort_num = tvm.placeholder((n, m), name="sort_num", dtype="int32")
    axis = 1
    is_descend = True
    out = tvm.extern(data.shape, [data, sort_num],
                     lambda ins, outs: tvm.call_packed(
                         "tvm.contrib.sort.argsort", ins[0],
                         ins[1], outs[0], axis, is_descend),
                     dtype='int32', name="sort_tensor")
    input = [[[1, 2, 3], [2, 4.5, 3.5], [1.1, 0.5, 1], [3.2, -5, 0.5], [1.5, 0, 0]],
             [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]]
    sort_num_input = [[1, 2, 3], [4, 5, 5]]
    sorted_index = [[[0, 1, 1], [1, 0, 0], [2, 2, 2], [3, 3, 3], [4, 4, 4]],
                    [[3, 4, 4], [2, 3, 3], [1, 2, 2], [0, 1, 1], [4, 0, 0]]]

    ctx = tvm.cpu(0)
    target = "llvm"
    s = tvm.create_schedule(out.op)
    f = tvm.build(s, [data, sort_num, out], target)
    a = tvm.nd.array(np.array(input).astype(data.dtype), ctx)
    b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
    c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
    f(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), np.array(sorted_index).astype(out.dtype), rtol=1e-5)
예제 #8
0
파일: test_sort.py 프로젝트: LANHUIYING/tvm
def test_sort_np():
    dshape = (1, 2, 3, 4, 5, 6)
    axis = 4
    reduced_shape = (1, 2, 3, 4, 6)
    is_descend = False
    data = tvm.placeholder(dshape, name='data')
    sort_num = tvm.placeholder(reduced_shape, name="sort_num", dtype="int32")
    out = tvm.extern(data.shape, [data, sort_num],
                     lambda ins, outs: tvm.call_packed(
                         "tvm.contrib.sort.argsort", ins[0],
                         ins[1], outs[0], axis, is_descend),
                     dtype='int32', name="sort_tensor")

    ctx = tvm.cpu(0)
    target = "llvm"
    s = tvm.create_schedule(out.op)
    f = tvm.build(s, [data, sort_num, out], target)

    np_data = np.random.uniform(size=dshape)
    np_out = np.argsort(np_data, axis=axis)
    sort_num_input = np.full(reduced_shape, dshape[axis])
    a = tvm.nd.array(np.array(np_data).astype(data.dtype), ctx)
    b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
    c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
    f(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), np_out, rtol=1e-5)
예제 #9
0
 def intrin_func(ins, outs, sp):
     assert (isinstance(ins[0], tvm.schedule.Buffer))
     assert (ins[0].shape[0] == n)
     assert (sp[0] == v)
     assert (sp[1] == w)
     return tvm.call_packed("hw_func", ins[0].data, outs[0].data, sp[0],
                            sp[1])
예제 #10
0
def test_min_repeat_ms():
    tmp = tempdir()
    filename = tmp.relpath("log")

    @tvm.register_func
    def my_debug(filename):
        """one call lasts for 100 ms and writes one character to a file"""
        time.sleep(0.1)
        with open(filename, "a") as fout:
            fout.write("c")

    X = tvm.compute((), lambda : tvm.call_packed("my_debug", filename))
    s = tvm.create_schedule(X.op)
    func = tvm.build(s, [X])

    x = tvm.nd.empty((), dtype="int32")
    ftimer = func.time_evaluator(func.entry_name, tvm.cpu(),
                                 number=1, repeat=1)
    ftimer(x)

    with open(filename, "r") as fin:
        ct = len(fin.readline())

    assert ct == 2


    ftimer = func.time_evaluator(func.entry_name, tvm.cpu(),
                                 number=1, repeat=1, min_repeat_ms=1000)
    ftimer(x)

    # make sure we get more than 10 calls
    with open(filename, "r") as fin:
        ct = len(fin.readline())

    assert ct > 10 + 2
예제 #11
0
def test_stack_vm_basic():
    a = tvm.nd.array(np.zeros(10, dtype='float32'))
    @tvm.register_func
    def tvm_call_back_get_shape(shape0):
        print(shape0)
        assert shape0 == a.shape[0]

    n = tvm.var('n')
    Ab = tvm.decl_buffer((n, ), tvm.float32)
    stmt = tvm.make.Evaluate(tvm.call_packed("tvm_call_back_get_shape", Ab.shape[0]))
    fapi = tvm.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True)
    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
    run_jit(fapi, lambda f: f(a))
def test_stack_vm_basic():
    a = tvm.nd.array(np.zeros(10, dtype='float32'))

    @tvm.register_func
    def tvm_call_back_get_shape(shape0):
        print(shape0)
        assert shape0 == a.shape[0]

    n = tvm.var('n')
    Ab = tvm.decl_buffer((n, ), tvm.float32)
    stmt = tvm.make.Evaluate(
        tvm.call_packed("tvm_call_back_get_shape", Ab.shape[0]))
    fapi = tvm.ir_pass.MakeAPI(stmt, "print_shape", [Ab], 0, True)
    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
    run_jit(fapi, lambda f: f(a))
예제 #13
0
파일: extern_op.py 프로젝트: ryansoq/nixtvm
def lesson1():
    ######################################################################
    # Use Extern Tensor Function
    # --------------------------
    # In the example below, we use :any:`tvm.extern` to add an extern
    # array function call. In the extern call, we declare the shape
    # of output tensors. In the second argument we provide the list of inputs.
    #
    # User will need to provide a function describing how to compute the result.
    # The compute function takes list of symbolic placeholder for the inputs,
    # list of symbolic placeholder for the outputs and returns the executing statement.
    #
    # In this case we simply call a registered tvm function, which invokes a CBLAS call.
    # TVM does not control internal of the extern array function and treats it as blackbox.
    # We can further mix schedulable TVM calls that add a bias term to the result.
    #
    n = 1024
    l = 128
    m = 235
    bias = tvm.var('bias', dtype=tvm.float32)
    A = tvm.placeholder((n, l), name='A')
    B = tvm.placeholder((l, m), name='B')
    C = tvm.extern(
        (n, m), [A, B],
        lambda ins, outs: tvm.call_packed("tvm.contrib.cblas.matmul", ins[0],
                                          ins[1], outs[0], False, False),
        name="C")
    D = tvm.compute(C.shape, lambda i, j: C[i, j] + bias, name="D")
    s = tvm.create_schedule(D.op)
    ######################################################################
    # Verify the Result
    # -----------------
    # We can verify that the result matches what we expected.
    #
    ctx = tvm.cpu(0)
    f = tvm.build(s, [A, B, D, bias], "llvm")
    a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
    b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
    d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
    bb = 10.0
    f(a, b, d, bb)
    np.testing.assert_allclose(d.asnumpy(),
                               np.dot(a.asnumpy(), b.asnumpy()) + 10,
                               rtol=1e-5)
예제 #14
0
def trace1():
    @tvm.register_func
    def my_debug(x):
        print("array=", x.asnumpy())
        return 0

    x = tvm.placeholder((4, ), name="x", dtype="int32")
    xbuffer = tvm.decl_buffer(x.shape, dtype=x.dtype)

    y = tvm.compute(x.shape, lambda i: tvm.call_packed("my_debug", xbuffer))
    s = tvm.create_schedule(y.op)

    print(tvm.lower(s, [x, y], binds={x: xbuffer}, simple_mode=True))

    f = tvm.build(s, [xbuffer, y], binds={x: xbuffer})
    xnd = tvm.nd.array(np.ones((4, ), dtype=x.dtype))
    ynd = tvm.nd.array(np.zeros((4, ), dtype=y.dtype))
    f(xnd, ynd)
    print(ynd)
예제 #15
0
def test_stack_vm_loop():
    dtype = 'int64'
    n = tvm.var('n')
    Ab = tvm.decl_buffer((n, ), dtype)
    i = tvm.var('i')

    ib = tvm.ir_builder.create()
    A = ib.buffer_ptr(Ab)
    with ib.for_range(0, n - 1, "i") as i:
        A[i + 1] = A[i] + 1
        ib.emit(tvm.call_packed("tvm_stack_vm_print", i))

    stmt = ib.get()
    fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
    a = tvm.nd.array(np.zeros(10, dtype=dtype))
    def check(f):
        f(a)
        np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0]))
    run_jit(fapi, check)
예제 #16
0
def test_static_init():
    dtype = 'int64'
    n = tvm.var('n')
    Ab = tvm.decl_buffer((n, ), dtype)
    i = tvm.var('i')
    ib = tvm.ir_builder.create()
    handle = tvm.call_intrin("handle", "tvm_static_handle")
    ib.emit(tvm.call_packed("test_static_callback", handle, Ab))

    @tvm.register_func("test_static_callback")
    def test_cb(sh, A):
        assert isinstance(sh, ctypes.c_void_p)
        return sh

    stmt = ib.get()
    fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
    f = tvm.codegen.build_module(fapi, "llvm")
    a = tvm.nd.array(np.zeros(10, dtype=dtype))
    f(a)
예제 #17
0
def test_static_init():
    dtype = 'int64'
    n = tvm.var('n')
    Ab = tvm.decl_buffer((n, ), dtype)
    i = tvm.var('i')
    ib = tvm.ir_builder.create()
    handle = tvm.call_intrin("handle", "tvm_static_handle")
    ib.emit(
        tvm.call_packed("test_static_callback", handle, Ab))

    @tvm.register_func("test_static_callback")
    def test_cb(sh, A):
        assert isinstance(sh, ctypes.c_void_p)
        return sh

    stmt = ib.get()
    fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
    f = tvm.codegen.build_module(fapi, "llvm")
    a = tvm.nd.array(np.zeros(10, dtype=dtype))
    f(a)
예제 #18
0
def test_min_repeat_ms():
    tmp = tempdir()
    filename = tmp.relpath("log")

    @tvm.register_func
    def my_debug(filename):
        """one call lasts for 100 ms and writes one character to a file"""
        time.sleep(0.1)
        filename = ctypes.c_char_p(filename.value).value
        with open(filename, "a") as fout:
            fout.write("c")

    X = tvm.compute((), lambda: tvm.call_packed("my_debug", filename))
    s = tvm.create_schedule(X.op)
    func = tvm.build(s, [X])

    x = tvm.nd.empty((), dtype="int32")
    ftimer = func.time_evaluator(func.entry_name,
                                 tvm.cpu(),
                                 number=1,
                                 repeat=1)
    ftimer(x)

    with open(filename, "r") as fin:
        ct = len(fin.readline())

    assert ct == 2

    ftimer = func.time_evaluator(func.entry_name,
                                 tvm.cpu(),
                                 number=1,
                                 repeat=1,
                                 min_repeat_ms=1000)
    ftimer(x)

    # make sure we get more than 10 calls
    with open(filename, "r") as fin:
        ct = len(fin.readline())

    assert ct > 10 + 2
예제 #19
0
def test_stack_vm_loop():
    dtype = 'int64'
    n = tvm.size_var('n')
    Ab = tvm.decl_buffer((n, ), dtype)
    i = tvm.size_var('i')

    ib = tvm.ir_builder.create()
    A = ib.buffer_ptr(Ab)
    with ib.for_range(0, n - 1, "i") as i:
        A[i + 1] = A[i] + 1
        ib.emit(tvm.call_packed("tvm_stack_vm_print", i))

    stmt = ib.get()
    fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True)
    fapi = tvm.ir_pass.LowerTVMBuiltin(fapi)
    a = tvm.nd.array(np.zeros(10, dtype=dtype))

    def check(f):
        f(a)
        np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0]))

    run_jit(fapi, check)
예제 #20
0
def main():
    ctx = tvm.cpu(0)
    n = 1024
    l = 128
    m = 235
    bias = tvm.var('bias', dtype=tvm.float32)
    A = tvm.placeholder((n, l), name='A')
    B = tvm.placeholder((l, m), name='B')
    C = tvm.extern(
        (n, m), [A, B],
        lambda ins, outs: tvm.call_packed("tvm.contrib.cblas.matmul", ins[0],
                                          ins[1], outs[0], False, False),
        name="C")
    D = tvm.compute(C.shape, lambda i, j: C(i, j) + bias, name="D")
    s = tvm.create_schedule(D.op)
    f = tvm.build(s, [A, B, D, bias], "llvm")
    a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
    b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
    d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
    bb = 10.0
    print(d.asnumpy())
    tvm.testing.assert_allclose(d.asnumpy(),
                                np.dot(a.asnumpy(), b.asnumpy()) + 10,
                                rtol=1e-5)
예제 #21
0
 def extern_generator(ins, outs):
     """Manually write the IR for the extern function, add pipeline."""
     return tvm.call_packed("my_extern_array_func2", ins[0], outs[0])
예제 #22
0
 def intrin_func(ins, outs):
     xx, = ins
     zz = outs[0]
     return tvm.call_packed("op", xx, zz)
예제 #23
0
 def intrin_func(ins, outs):
     return tvm.call_packed("multivadd")
예제 #24
0
 def intrin_func(ins, outs):
     dinp = ins[0]
     dout = outs[0]
     return tvm.call_packed("op", dinp, dout)
예제 #25
0
 def intrin_func(ins, outs):
     return tvm.call_packed("multivadd")
예제 #26
0
 def intrin_func(ins, outs):
     xx, yy = ins
     zz = outs[0]
     return tvm.call_packed("vadd", xx, yy, zz)
예제 #27
0
파일: nms.py 프로젝트: LANHUIYING/tvm
def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1):
    """Non-maximum suppression operator for object detection.

    Parameters
    ----------
    data: tvm.Tensor
        3-D tensor with shape [batch_size, num_anchors, 6].
        The last dimension should be in format of
        [class_id, score, box_left, box_top, box_right, box_bottom].

    valid_count : tvm.Tensor
        1-D tensor for valid number of boxes.

    nms_threshold : float
        Non-maximum suppression threshold.

    force_suppress : boolean
        Whether to suppress all detections regardless of class_id.

    nms_topk : int
        Keep maximum top k detections before nms, -1 for no limit.

    Returns
    -------
    out : tvm.Tensor
        3-D tensor with shape [batch_size, num_anchors, 6].

    Example
    --------
    .. code-block:: python

        # An example to use nms
        dshape = (1, 5, 6)
        data = tvm.placeholder(dshape, name="data")
        valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count")
        nms_threshold = 0.7
        force_suppress = True
        nms_topk = -1
        out = nms(data, valid_count, nms_threshold, force_suppress, nms_topk)
        np_data = np.random.uniform(dshape)
        np_valid_count = np.array([4])
        s = topi.generic.schedule_nms(out)
        f = tvm.build(s, [data, valid_count, out], "llvm")
        ctx = tvm.cpu()
        tvm_data = tvm.nd.array(np_data, ctx)
        tvm_valid_count = tvm.nd.array(np_valid_count, ctx)
        tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx)
        f(tvm_data, tvm_valid_count, tvm_out)
    """
    batch_size = data.shape[0]
    num_anchors = data.shape[1]
    valid_count_dtype = "int32"
    valid_count_buf = api.decl_buffer(valid_count.shape, valid_count_dtype,
                                      "valid_count_buf", data_alignment=4)
    data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8)
    score_axis = 1
    score_shape = (batch_size, num_anchors)
    score_tensor = tvm.compute(score_shape, lambda i, j: data[i, j, score_axis])
    score_tensor_buf = api.decl_buffer(score_tensor.shape, data.dtype,
                                       "score_tensor_buf", data_alignment=8)
    sort_tensor_dtype = "int32"
    sort_tensor_buf = api.decl_buffer(score_shape, sort_tensor_dtype,
                                      "sort_tensor_buf", data_alignment=8)
    sort_tensor = \
        tvm.extern(score_shape,
                   [score_tensor, valid_count],
                   lambda ins, outs: tvm.call_packed(
                       "tvm.contrib.sort.argsort", ins[0], ins[1],
                       outs[0], score_axis, True),
                   dtype=sort_tensor_dtype,
                   in_buffers=[score_tensor_buf, valid_count_buf],
                   out_buffers=sort_tensor_buf,
                   name="nms_sort")
    out = \
        tvm.extern(data.shape,
                   [data, sort_tensor, valid_count],
                   lambda ins, outs: nms_ir(
                       ins[0], ins[1], ins[2], outs[0], nms_threshold,
                       force_suppress, nms_topk),
                   dtype="float32",
                   in_buffers=[data_buf, sort_tensor_buf, valid_count_buf],
                   tag="nms")
    return out
예제 #28
0
 def intrin_func(ins, outs):
     assert(isinstance(ins[0], tvm.schedule.Buffer))
     assert(ins[0].shape[0].value == n)
     return tvm.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0])
예제 #29
0
파일: extern_op.py 프로젝트: bddppq/tvm
# The compute function takes list of symbolic placeholder for the inputs,
# list of symbolic placeholder for the outputs and returns the executing statement.
#
# In this case we simply call a registered TVM function, which invokes a CBLAS call.
# TVM does not control internal of the extern array function and treats it as blackbox.
# We can further mix schedulable TVM calls that add a bias term to the result.
#
n = 1024
l = 128
m = 235
bias = tvm.var('bias', dtype=tvm.float32)
A = tvm.placeholder((n, l), name='A')
B = tvm.placeholder((l, m), name='B')
C = tvm.extern((n, m), [A, B],
               lambda ins, outs: tvm.call_packed(
                   "tvm.contrib.cblas.matmul",
                   ins[0], ins[1], outs[0], False, False), name="C")
D = tvm.compute(C.shape, lambda i, j: C[i,j] + bias, name="D")
s = tvm.create_schedule(D.op)

######################################################################
# Verify the Result
# -----------------
# We can verify that the result matches what we expected.
#
ctx = tvm.cpu(0)
f = tvm.build(s, [A, B, D, bias], "llvm")
a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
bb = 10.0
예제 #30
0
 def extern_generator(ins, outs):
     """Manually write the IR for the extern function, add pipeline."""
     return tvm.call_packed("my_extern_array_func1", ins[0], outs[0])
예제 #31
0
# f = tvm.build(s, [A, B, bias,D], 'llvm')
a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx=ctx)
b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx=ctx)
d = tvm.nd.array(np.zeros(shape=(n, m), dtype=D.dtype), ctx=ctx)
bb = 10.0
f(a, b, d, bb)
np.testing.assert_allclose(d.asnumpy(),
                           np.dot(a.asnumpy(), b.asnumpy()) + 10,
                           rtol=1e-5)
print(d.shape)


@tvm.register_func('tvm.contrib.my_tvm_add_one')
def my_tvm_add_one(x, y):
    print('my tvm add one signatures :%s, %s' % (type(x), type(y)))
    tvm.nd.array(x.asnumpy() + 1).copyto(y)


A = tvm.placeholder((n, ), name='A')
B = tvm.extern(A.shape, [A],
               lambda ins, outs: tvm.call_packed('tvm.contrib.my_tvm_add_one',
                                                 ins[0], outs[0]),
               name='C')
s = tvm.create_schedule(B.op)
f = tvm.build(s, [A, B], 'llvm')
a = tvm.nd.array(np.random.uniform(size=(n, )).astype(A.dtype), ctx=ctx)
b = tvm.nd.array(np.random.uniform(size=(n, )).astype(B.dtype), ctx=ctx)
f(a, b)
np.testing.assert_allclose(b.asnumpy(), a.asnumpy() + 1, rtol=1e-5)
print(b.shape)
예제 #32
0
 def intrin_func(ins, outs):
     xx, = ins
     zz = outs[0]
     return tvm.call_packed("op", xx, zz)
예제 #33
0
 def intrin_func(ins, outs):
     assert(isinstance(ins[0], tvm.schedule.Buffer))
     assert(ins[0].shape[0].value == n)
     return tvm.call_packed("vadd", ins[0].data, outs[0].data, ins[0].shape[0])
예제 #34
0
 def intrin_func(ins, outs):
     xx, yy = ins
     zz = outs[0]
     return tvm.call_packed("vadd", xx, yy, zz)
예제 #35
0
def argsort(data, valid_count, axis=-1, is_ascend=1, dtype="float32", flag=0):
    """Performs sorting along the given axis and returns an array
    of indices having the same shape as an input array that index
    data in sorted order.

    Parameters
    ----------
    data : tvm.Tensor
        The input tensor.

    valid_count : tvm.Tensor
        1-D tensor for valid number of boxes only for ssd.

    axis : optional, int
	Axis along which to sort the input tensor.
        By default the flattened array is used.

    is_ascend : optional, boolean
        Whether to sort in ascending or descending order.

    dtype : optional, string
        DType of the output indices.

    flag : optional, boolean
        Whether valid_count is valid.

    Returns
    -------
    out : tvm.Tensor
        Sorted index tensor.

    Example
    --------
    .. code-block:: python

        # An example to use argsort
        dshape = (1, 5, 6)
        data = tvm.placeholder(dshape, name="data")
        valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count")
        axis = 0
        is_ascend = False
        flag = False
        out = argsort(data, valid_count, axis, is_ascend, flag)
        np_data = np.random.uniform(dshape)
        np_valid_count = np.array([4])
        s = topi.generic.schedule_argsort(out)
        f = tvm.build(s, [data, valid_count, out], "llvm")
        ctx = tvm.cpu()
        tvm_data = tvm.nd.array(np_data, ctx)
        tvm_valid_count = tvm.nd.array(np_valid_count, ctx)
        tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx)
        f(tvm_data, tvm_valid_count, tvm_out)
    """
    data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8)
    if flag:
        valid_count_buf = api.decl_buffer(valid_count.shape, valid_count.dtype,
                                          "valid_count_buf", data_alignment=4)
        out_buf = api.decl_buffer(data.shape, "int32", "out_buf", data_alignment=8)
        out = \
            tvm.extern(data.shape,
                       [data, valid_count],
                       lambda ins, outs: tvm.call_packed(
                           "tvm.contrib.sort.argsort_nms", ins[0], ins[1],
                           outs[0], axis, is_ascend),
                       dtype="int32",
                       in_buffers=[data_buf, valid_count_buf],
                       out_buffers=out_buf,
                       name="argsort_nms_cpu",
                       tag="argsort_nms_cpu")
    else:
        out_buf = api.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8)
        out = \
            tvm.extern(data.shape,
                       [data],
                       lambda ins, outs: tvm.call_packed(
                           "tvm.contrib.sort.argsort", ins[0],
                           outs[0], axis, is_ascend),
                       dtype=dtype,
                       in_buffers=[data_buf],
                       out_buffers=out_buf,
                       name="argsort_cpu",
                       tag="argsort_cpu")
    return out
예제 #36
0
 def extern_func(ins, outs):
     assert(isinstance(ins[0], tvm.schedule.Buffer))
     return tvm.call_packed(
         "myadd", ins[0].data, outs[0].data, outs[1].data, m)
예제 #37
0
# The compute function takes list of symbolic placeholder for the inputs,
# list of symbolic placeholder for the outputs and returns the executing statement.
#
# In this case we simply call a registered tvm function, which invokes a CBLAS call.
# TVM does not control internal of the extern array function and treats it as blackbox.
# We can further mix schedulable TVM calls that add a bias term to the result.
#
n = 1024
l = 128
m = 235
bias = tvm.var('bias', dtype=tvm.float32)
A = tvm.placeholder((n, l), name='A')
B = tvm.placeholder((l, m), name='B')
C = tvm.extern(
    (n, m), [A, B],
    lambda ins, outs: tvm.call_packed("tvm.contrib.cblas.matmul", ins[0], ins[
        1], outs[0], False, False),
    name="C")
D = tvm.compute(C.shape, lambda i, j: C[i, j] + bias, name="D")
s = tvm.create_schedule(D.op)

######################################################################
# Verify the Result
# -----------------
# We can verify that the result matches what we expected.
#
ctx = tvm.cpu(0)
f = tvm.build(s, [A, B, D, bias], "llvm")
a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
bb = 10.0
예제 #38
0
def nms(data,
        valid_count,
        nms_threshold=0.5,
        force_suppress=False,
        nms_topk=-1):
    """Non-maximum suppression operator for object detection.

    Parameters
    ----------
    data: tvm.Tensor
        3-D tensor with shape [batch_size, num_anchors, 6].
        The last dimension should be in format of
        [class_id, score, box_left, box_top, box_right, box_bottom].

    valid_count : tvm.Tensor
        1-D tensor for valid number of boxes.

    nms_threshold : float
        Non-maximum suppression threshold.

    force_suppress : boolean
        Whether to suppress all detections regardless of class_id.

    nms_topk : int
        Keep maximum top k detections before nms, -1 for no limit.

    Returns
    -------
    out : tvm.Tensor
        3-D tensor with shape [batch_size, num_anchors, 6].

    Example
    --------
    .. code-block:: python

        # An example to use nms
        dshape = (1, 5, 6)
        data = tvm.placeholder(dshape, name="data")
        valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count")
        nms_threshold = 0.7
        force_suppress = True
        nms_topk = -1
        out = nms(data, valid_count, nms_threshold, force_suppress, nms_topk)
        np_data = np.random.uniform(size=dshape).astype("float32")
        np_valid_count = np.array([4]).astype("int32")
        s = topi.generic.schedule_nms(out)
        f = tvm.build(s, [data, valid_count, out], "llvm")
        ctx = tvm.cpu()
        tvm_data = tvm.nd.array(np_data, ctx)
        tvm_valid_count = tvm.nd.array(np_valid_count, ctx)
        tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx)
        f(tvm_data, tvm_valid_count, tvm_out)
    """
    batch_size = data.shape[0]
    num_anchors = data.shape[1]
    valid_count_dtype = "int32"
    valid_count_buf = api.decl_buffer(valid_count.shape,
                                      valid_count_dtype,
                                      "valid_count_buf",
                                      data_alignment=4)
    data_buf = api.decl_buffer(data.shape,
                               data.dtype,
                               "data_buf",
                               data_alignment=8)
    score_axis = 1
    score_shape = (batch_size, num_anchors)
    score_tensor = tvm.compute(score_shape,
                               lambda i, j: data[i, j, score_axis])
    score_tensor_buf = api.decl_buffer(score_tensor.shape,
                                       data.dtype,
                                       "score_tensor_buf",
                                       data_alignment=8)
    sort_tensor_dtype = "int32"
    sort_tensor_buf = api.decl_buffer(score_shape,
                                      sort_tensor_dtype,
                                      "sort_tensor_buf",
                                      data_alignment=8)
    sort_tensor = \
        tvm.extern(score_shape,
                   [score_tensor, valid_count],
                   lambda ins, outs: tvm.call_packed(
                       "tvm.contrib.sort.argsort", ins[0], ins[1],
                       outs[0], score_axis, True),
                   dtype=sort_tensor_dtype,
                   in_buffers=[score_tensor_buf, valid_count_buf],
                   out_buffers=sort_tensor_buf,
                   name="nms_sort")
    out = \
        tvm.extern(data.shape,
                   [data, sort_tensor, valid_count],
                   lambda ins, outs: nms_ir(
                       ins[0], ins[1], ins[2], outs[0], nms_threshold,
                       force_suppress, nms_topk),
                   dtype="float32",
                   in_buffers=[data_buf, sort_tensor_buf, valid_count_buf],
                   tag="nms")
    return out
예제 #39
0
 def extern_func(ins, outs):
     assert(isinstance(ins[0], tvm.schedule.Buffer))
     return tvm.call_packed(
         "myadd", ins[0].data, outs[0].data, outs[1].data, m)
예제 #40
0
파일: nms.py 프로젝트: zhyj3038/tvm
def non_max_suppression(data,
                        valid_count,
                        max_output_size=-1,
                        iou_threshold=0.5,
                        force_suppress=False,
                        top_k=-1,
                        id_index=0,
                        return_indices=True,
                        invalid_to_bottom=False):
    """Non-maximum suppression operator for object detection.

    Parameters
    ----------
    data : tvm.Tensor
        3-D tensor with shape [batch_size, num_anchors, 6].
        The last dimension should be in format of
        [class_id, score, box_left, box_top, box_right, box_bottom].

    valid_count : tvm.Tensor
        1-D tensor for valid number of boxes.

    max_output_size : optional, int
        Max number of output valid boxes for each instance.
        By default all valid boxes are returned.

    iou_threshold : optional, float
        Non-maximum suppression threshold.

    force_suppress : optional, boolean
        Whether to suppress all detections regardless of class_id.

    top_k : optional, int
        Keep maximum top k detections before nms, -1 for no limit.

    id_index : optional, int
        index of the class categories, -1 to disable.

    return_indices : optional, boolean
        Whether to return box indices in input data.

    invalid_to_bottom : optional, boolean
        Whether to move all valid bounding boxes to the top.

    Returns
    -------
    out : tvm.Tensor
        3-D tensor with shape [batch_size, num_anchors, 6].

    Example
    --------
    .. code-block:: python

        # An example to use non_max_suppression
        dshape = (1, 5, 6)
        data = tvm.placeholder(dshape, name="data")
        valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count")
        iou_threshold = 0.7
        force_suppress = True
        top_k = -1
        out = non_max_suppression(data, valid_count, iou_threshold=iou_threshold,
                                  force_suppress=force_suppress, top_k=top_k)
        np_data = np.random.uniform(dshape)
        np_valid_count = np.array([4])
        s = topi.generic.schedule_nms(out)
        f = tvm.build(s, [data, valid_count, out], "llvm")
        ctx = tvm.cpu()
        tvm_data = tvm.nd.array(np_data, ctx)
        tvm_valid_count = tvm.nd.array(np_valid_count, ctx)
        tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx)
        f(tvm_data, tvm_valid_count, tvm_out)
    """
    batch_size = data.shape[0]
    num_anchors = data.shape[1]
    valid_count_dtype = "int32"
    valid_count_buf = api.decl_buffer(valid_count.shape,
                                      valid_count_dtype,
                                      "valid_count_buf",
                                      data_alignment=4)
    score_axis = 1
    score_shape = (batch_size, num_anchors)
    score_tensor = tvm.compute(score_shape,
                               lambda i, j: data[i, j, score_axis])
    score_tensor_buf = api.decl_buffer(score_tensor.shape,
                                       data.dtype,
                                       "score_tensor_buf",
                                       data_alignment=8)
    sort_tensor_dtype = "int32"
    sort_tensor_buf = api.decl_buffer(score_shape,
                                      sort_tensor_dtype,
                                      "sort_tensor_buf",
                                      data_alignment=8)
    sort_tensor = \
        tvm.extern(score_shape,
                   [score_tensor, valid_count],
                   lambda ins, outs: tvm.call_packed(
                       "tvm.contrib.sort.argsort", ins[0], ins[1],
                       outs[0], score_axis, True),
                   dtype=sort_tensor_dtype,
                   in_buffers=[score_tensor_buf, valid_count_buf],
                   out_buffers=sort_tensor_buf,
                   name="nms_sort")
    out, box_indices = hybrid_nms(data, sort_tensor, valid_count,
                                  tvm.const(max_output_size, dtype="int32"),
                                  tvm.const(iou_threshold, dtype="float32"),
                                  tvm.const(force_suppress, dtype="bool"),
                                  tvm.const(top_k, dtype="int32"),
                                  tvm.const(id_index, dtype="int32"))
    if not return_indices and invalid_to_bottom:
        out = hybrid_rearrange_out(out)

    return box_indices if return_indices else out
예제 #41
0
import tvm
import numpy as np


@tvm.register_func
def my_debug(x):
    print("array=", x.asnumpy())
    return 0


x = tvm.placeholder((4, ), name="x", dtype="int32")
xbuffer = tvm.decl_buffer(x.shape, dtype=x.dtype)

y = tvm.compute(x.shape, lambda i: tvm.call_packed("my_debug", xbuffer))
s = tvm.create_schedule(y.op)

print(tvm.lower(s, [x, y], binds={x: xbuffer}, simple_mode=True))

f = tvm.build(s, [xbuffer, y], binds={x: xbuffer})
xnd = tvm.nd.array(np.ones((4, ), dtype=x.dtype))
ynd = tvm.nd.array(np.zeros((4, ), dtype=y.dtype))

f(xnd, ynd)