Exemplo n.º 1
0
    def _bilinear_sample(n, c, h, w):
        y, x = _compute_source_index(n, h, w)
        y0 = te.floor(y).astype("int32")
        x0 = te.floor(x).astype("int32")
        y1 = y0 + tir.const(1, "int32")
        x1 = x0 + tir.const(1, "int32")

        return (
            _get_pixel_value(n, c, y0, x0) * (1.0 - (y - y0)) * (1.0 - (x - x0))
            + _get_pixel_value(n, c, y0, x1) * (1.0 - (y - y0)) * (x - x0)
            + _get_pixel_value(n, c, y1, x0) * (y - y0) * (1.0 - (x - x0))
            + _get_pixel_value(n, c, y1, x1) * (y - y0) * (x - x0)
        )
Exemplo n.º 2
0
 def _bilinear_sample(n, c, h, w):
     x = grid[n, 0, h, w]
     y = grid[n, 1, h, w]
     y = (y + 1) * (in_height - 1) / 2
     x = (x + 1) * (in_width - 1) / 2
     x0 = te.floor(x).astype('int32')
     y0 = te.floor(y).astype('int32')
     x1 = x0 + tir.const(1, 'int32')
     y1 = y0 + tir.const(1, 'int32')
     return _get_pixel_value(n, c, y0, x0) * (1.0 - (y - y0)) * (1.0 - (x - x0)) \
         + _get_pixel_value(n, c, y0, x1) * (1.0 - (y - y0)) * (x - x0) \
         + _get_pixel_value(n, c, y1, x0) * (y - y0) * (1.0 - (x - x0)) \
         + _get_pixel_value(n, c, y1, x1) * (y - y0) * (x - x0)
Exemplo n.º 3
0
def common_reduce(name, args=(0,)):
  if not isinstance(args, tuple) and not isinstance(args, list):
    args = (args, )
  def reduce_op(x, y):
    assert x.dtype == y.dtype , "Reduing elements that don't have same data type: %s v.s. %s" % (x.dtype, y.dtype)
    return tir.call_pure_extern(x.dtype, name, x, y, *args[1:])
  return te.comm_reducer(reduce_op, lambda t: tir.const(args[0], dtype=t), name=name)
Exemplo n.º 4
0
def test_ret_const():
    a = tir.const(0)
    b = tir.ret(a)
    b = tir.Evaluate(b)
    func = tir.PrimFunc([], b)
    func = build_tir_func(func)
    out = func()
    assert out == 0
Exemplo n.º 5
0
def test_underflow():
    int_min = tir.const(-(1 << 31), 'int32')
    constant_fold_res = (int_min - 1).value
    int_min_np = np.int32(-(1 << 31))
    res_np = int_min_np - np.int32(1)

    print(constant_fold_res, res_np)

    assert constant_fold_res == res_np
Exemplo n.º 6
0
    def _trilinear_sample(n, c, d, h, w):
        z, y, x = _compute_source_index(n, d, h, w)
        z0 = te.floor(z).astype("int32")
        y0 = te.floor(y).astype("int32")
        x0 = te.floor(x).astype("int32")
        z1 = z0 + tir.const(1, "int32")
        y1 = y0 + tir.const(1, "int32")
        x1 = x0 + tir.const(1, "int32")

        return (
            _get_pixel_value(n, c, z0, y0, x0) * (1 - (x - x0)) * (1 - (y - y0)) * (1 - (z - z0))
            + _get_pixel_value(n, c, z0, y0, x1) * (x - x0) * (1 - (y - y0)) * (1 - (z - z0))
            + _get_pixel_value(n, c, z1, y1, x0) * (1 - (x - x0)) * (y - y0) * (z - z0)
            + _get_pixel_value(n, c, z1, y1, x1) * (x - x0) * (y - y0) * (z - z0)
            + _get_pixel_value(n, c, z0, y1, x0) * (1 - (x - x0)) * (y - y0) * (1 - (z - z0))
            + _get_pixel_value(n, c, z1, y0, x1) * (x - x0) * (1 - (y - y0)) * (z - z0)
            + _get_pixel_value(n, c, z1, y0, x0) * (1 - (x - x0)) * (1 - (y - y0)) * (z - z0)
            + _get_pixel_value(n, c, z0, y1, x1) * (x - x0) * (y - y0) * (1 - (z - z0))
        )
Exemplo n.º 7
0
def test_reduce():
    def check(m, target_bits, target_dtype):
        A = te.placeholder((m, ), name="A", dtype="float32")
        k = te.reduce_axis((0, m), "k")
        B = te.compute((), lambda *idx: te.sum(A[k], axis=k), name="B")
        s = te.create_schedule(B.op)
        stmt = lower_sch(s, [A, B], target_bits)
        assert stmt[1].loop_var.dtype == target_dtype

    # i32 -> i32
    check(const(64, dtype="int32"), 32, "int32")
    # i64 -> i32
    check(const(64, dtype="int64"), 32, "int32")
    # i32 -> i16
    check(const(64, dtype="int32"), 16, "int16")
    check(const(2**16, dtype="int32"), 16, "int32")
    # symbolic
    check(te.var("n", dtype="int32"), 32, "int32")
    check(te.var("n", dtype="int64"), 32, "int64")
def test_reduce():
    def check(m, target_bits, target_dtype):
        A = te.placeholder((m, ), name='A', dtype='float32')
        k = te.reduce_axis((0, m), "k")
        B = te.compute((), lambda *idx: te.sum(A[k], axis=k), name='B')
        s = te.create_schedule(B.op)
        stmt = lower_sch(s, [A, B], target_bits)
        assert stmt.body[1].loop_var.dtype == target_dtype

    # i32 -> i32
    check(const(64, dtype='int32'), 32, 'int32')
    # i64 -> i32
    check(const(64, dtype='int64'), 32, 'int32')
    # i32 -> i16
    check(const(64, dtype='int32'), 16, 'int16')
    check(const(2**16, dtype='int32'), 16, 'int32')
    # symbolic
    check(te.var('n', dtype='int32'), 32, 'int32')
    check(te.var('n', dtype='int64'), 32, 'int64')
Exemplo n.º 9
0
    def gen_ir(
        data_ptr,
        n_fft,
        hop_length,
        win_length,
        window_ptr,
        normalized,
        onesided,
        output_ptr,
        loop_kind,
    ):
        ib = tir.ir_builder.create()
        data = ib.buffer_ptr(data_ptr)
        window = ib.buffer_ptr(window_ptr)
        output = ib.buffer_ptr(output_ptr)
        # https://librosa.org/doc/0.7.2/_modules/librosa/core/spectrum.html#stft
        with ib.for_range(0,
                          output_ptr.shape[0] * output_ptr.shape[1],
                          kind="parallel") as batch_row:
            with ib.for_range(0, output_ptr.shape[2], kind=loop_kind) as col:
                batch = ib.allocate("int32", (1), name="batch", scope="local")
                row = ib.allocate("int32", (1), name="row", scope="local")
                batch = tir.floordiv(batch_row, output_ptr.shape[1])
                row = tir.floormod(batch_row, output_ptr.shape[1])
                output[batch, row, col, 0] = tir.Cast(data_ptr.dtype, 0)
                output[batch, row, col, 1] = tir.Cast(data_ptr.dtype, 0)
                with ib.for_range(0, win_length) as wlen:
                    output[batch, row, col,
                           0] += (window[wlen] *
                                  data[batch, col * hop_length + wlen] *
                                  tir.cos(2 * pi * row * wlen / win_length))
                    output[batch, row, col,
                           1] -= (window[wlen] *
                                  data[batch, col * hop_length + wlen] *
                                  tir.sin(2 * pi * row * wlen / win_length))
                with ib.if_scope(normalized):
                    output[batch, row, col,
                           0] /= tir.sqrt(tir.const(n_fft, "float32"))
                    output[batch, row, col,
                           1] /= tir.sqrt(tir.const(n_fft, "float32"))

        return ib.get()
    def _check(variables, formulas, coef=(-5, 5), bounds=(-20, 20)):
        vs = [te.var("x" + str(i)) for i in range(variables)]

        fs = []
        for i in range(formulas):
            s1 = sum([v * random.randint(coef[0], coef[1]) for v in vs])
            s1 += random.randint(coef[0], coef[1])
            s2 = sum([v * random.randint(coef[0], coef[1]) for v in vs])
            s2 += random.randint(coef[0], coef[1])
            op = random.choice([tir.expr.EQ, tir.expr.LE, tir.expr.LT, tir.expr.GE, tir.expr.GT])
            fs.append(op(s1, s2))

        vranges = {v: tvm.ir.expr.Range(bounds[0], bounds[1] + 1) for v in vs}
        before = te.all(tir.const(1, "bool"), *fs)
        after = arith._ffi_api.SolveInequalitiesAsCondition(vs, vranges, fs)
        after = te.all(tir.const(1, "bool"), *after)
        testing.check_bool_expr_is_true(before == after, vranges)

        solution = arith.solve_linear_inequalities(fs, vs, vranges, deskew_range=True)
        testing.check_int_constraints_trans_consistency(solution)
Exemplo n.º 11
0
def test_convert_ssa():
    zero = tir.const(0)
    nop = tir.Evaluate(zero)
    v = tir.Var("i1", "int32")
    for_stmt = tir.For(v, zero, zero, tir.ForKind.SERIAL, nop)
    load = tir.Evaluate(tir.Load("int32", v, zero))
    seq = tir.SeqStmt([for_stmt, for_stmt, load])
    func = tir.PrimFunc([], seq)
    mod = tvm.IRModule({"main": func})
    mod = tir.transform.InjectVirtualThread()(
        mod
    )  # Use pass InjectVirtualThread to invoke ConvertSSA
def test_relay_take():
    engine = relay.backend.compile_engine.get()

    def check(shape, index, target_bits, target_dtype):
        x = relay.var("x", shape=shape)
        y = relay.op.take(x, indices=index)
        func = relay.Function([x], y)
        mod = tvm.IRModule.from_expr(func)
        func = mod["main"]
        z = engine.lower(func, "llvm")
        stmt = lower_sch(z.schedule, tuple(z.inputs) + tuple(z.outputs), 32)
        assert stmt.value.index.dtype == target_dtype

    check((const(2**16, 'int64'), const(2**15 + 1, 'int64')),
          relay.const(0, dtype="int64"),
          target_bits=32,
          target_dtype="int32")
    check((const(2**16, 'int64'), const(2**15 + 1, 'int64')),
          relay.const(2**31, dtype="int64"),
          target_bits=32,
          target_dtype="int64")
Exemplo n.º 13
0
def test_relay_basic():
    engine = relay.backend.compile_engine.get()

    def check(shapex, shapey, target_bits, target_dtype):
        x = relay.var("x", shape=shapex)
        y = relay.var("y", shape=shapey)
        z = relay.add(x, y)
        func = relay.Function([x, y], z)
        mod = tvm.IRModule.from_expr(func)
        mod = relay.transform.InferType()(mod)
        func = mod["main"]
        z = engine.lower(func, "llvm")
        stmt = lower_sch(z.schedule, tuple(z.inputs) + tuple(z.outputs), 32)
        # outer loop
        assert stmt.loop_var.dtype == target_dtype
        # inner loop
        if len(shapex) > 1 or len(shapey) > 1:
            assert stmt.body.loop_var.dtype == target_dtype

    check(
        (const(2**16, "int64"), const(2**15 + 1, "int64")),
        (1, const(2**15 + 1, "int64")),
        target_bits=32,
        target_dtype="int64",
    )
    check(
        (const(2**16, "int64"), const(2**15, "int64")),
        (1, const(2**15, "int64")),
        target_bits=32,
        target_dtype="int32",
    )
    check((const(2**31, "int64"), ), (const(2**31, "int64"), ),
          target_bits=32,
          target_dtype="int32")
    check(
        (const(2**31 + 1, "int64"), ),
        (const(2**31 + 1, "int64"), ),
        target_bits=32,
        target_dtype="int64",
    )
Exemplo n.º 14
0
def test_multilanes():
    def check(m, lanes, target_bits, target_dtype):
        ib = tvm.tir.ir_builder.create()
        Ab = tvm.tir.decl_buffer((m, ),
                                 dtype="float32x{}".format(lanes),
                                 name="A")
        A = ib.buffer_ptr(Ab)
        Bb = tvm.tir.decl_buffer((m, ),
                                 dtype="float32x{}".format(lanes),
                                 name="B")
        B = ib.buffer_ptr(Bb)
        with ib.for_range(0, m, name="i", dtype=m.dtype) as i:
            B[i] = A[i] + 1
        stmt = ib.get()
        stmt = lower_stmt([Ab, Bb], stmt, target_bits)
        assert stmt.loop_var.dtype == target_dtype

    # i32 -> i32
    check(const(2**10, dtype="int32"), 2, target_bits=32, target_dtype="int32")
    check(const(2**32, dtype="int32"), 2, target_bits=32, target_dtype="int32")
    # i64 -> i32
    check(const(2**10, dtype="int64"), 2, target_bits=32, target_dtype="int32")
    check(const(2**32, dtype="int64"), 2, target_bits=32, target_dtype="int64")
    # i32 -> i16
    check(const(2**10, dtype="int32"), 2, target_bits=16, target_dtype="int16")
    check(const(2**16, dtype="int32"), 2, target_bits=16, target_dtype="int32")
Exemplo n.º 15
0
def test_thread_axis():
    def check(m, n, target_bits, target_dtype):
        ib = tvm.tir.ir_builder.create()
        Ab = tvm.tir.decl_buffer((m, n), name="A")
        A = ib.buffer_ptr(Ab)
        Bb = tvm.tir.decl_buffer((m, n), name="B")
        B = ib.buffer_ptr(Bb)
        bx = te.thread_axis("blockIdx.x")
        tx = te.thread_axis("threadIdx.x")
        ib.scope_attr(bx, "thread_extent", m)
        ib.scope_attr(tx, "thread_extent", n)
        B[bx * n + tx] = A[bx * n + tx] + 1
        stmt = ib.get()
        stmt = lower_stmt([Ab, Bb], stmt, target_bits)
        assert stmt.node.var.dtype == target_dtype
        assert stmt.body.node.var.dtype == target_dtype

    # i32 -> i32
    check(2, 32, target_bits=32, target_dtype="int32")
    check(
        2**30,
        32,  # i32 + i32 is not promoted to i64 even in the case of overflow
        target_bits=32,
        target_dtype="int32",
    )
    # i64 -> i32
    check(const(2, dtype="int64"),
          const(32, dtype="int64"),
          target_bits=32,
          target_dtype="int32")
    check(
        const(2**30, dtype="int64"),
        const(32, dtype="int64"),
        target_bits=32,
        target_dtype="int64",
    )
    # i32 -> i16
    check(2, 32, target_bits=16, target_dtype="int16")
    check(2**14, 32, target_bits=16, target_dtype="int32")
Exemplo n.º 16
0
    def _get_pixel_value(n, c, h, w):
        if padding_mode == "zeros":
            return te.if_then_else(
                te.all(h >= 0, w >= 0, h < in_height, w < in_width),
                data[n, c, h, w],
                tir.const(0.0, dtype=data.dtype),
            )
        if padding_mode == "border":
            h_b = te.max(te.min(h, in_height - 1), 0)
            w_b = te.max(te.min(w, in_width - 1), 0)
            return data[n, c, h_b, w_b]

        raise AssertionError("unsupported padding_mode")
Exemplo n.º 17
0
def affine_grid(data, target_shape):
    """affine_grid operator that generates 2D sampling grid.

    This operation is described in https://arxiv.org/pdf/1506.02025.pdf. It generates a uniform
    sampling grid within the target shape and normalizes it to [-1, 1]. The provided affine
    transformation is then applied on the sampling grid.

    Parameters
    ----------
    data : tvm.Tensor
        3-D with shape [batch, 2, 3]. The affine matrix.

    target_shape: list/tuple of two int
        Specifies the output shape (H, W).

    Returns
    -------
    Output : tvm.Tensor
        4-D with shape [batch, 2, target_height, target_width]
    """
    assert target_shape is not None
    assert len(target_shape) == 2
    assert (
        target_shape[0] > 1 and target_shape[1] > 1
    ), "target height/width should be greater than 1"

    dtype = data.dtype
    y_step = tir.const((2.0 - 1e-7) / (target_shape[0] - 1), dtype=dtype)
    x_step = tir.const((2.0 - 1e-7) / (target_shape[1] - 1), dtype=dtype)
    start = tir.const(-1.0, dtype=dtype)

    def _compute(n, dim, i, j):
        y = start + i * y_step
        x = start + j * x_step
        return data[n, dim, 0] * x + data[n, dim, 1] * y + data[n, dim, 2]

    oshape = (data.shape[0], len(target_shape), *target_shape)
    return te.compute(oshape, _compute, tag="affine_grid")
def test_convert_ssa():
    dtype = "int32"
    zero = tir.const(0)
    nop = tir.Evaluate(zero)
    var_type = ir.PointerType(ir.PrimType(dtype))
    v = tir.Var("i1", var_type)
    buf = tir.decl_buffer([16], dtype=dtype, data=v)
    let = tir.LetStmt(v, v, nop)
    load = tir.Evaluate(tir.BufferLoad(buf, [zero]))
    seq = tir.SeqStmt([let, let, load])
    func = tir.PrimFunc([], seq)
    mod = tvm.IRModule({"main": func})
    mod = tir.transform.InjectVirtualThread()(
        mod)  # Use pass InjectVirtualThread to invoke ConvertSSA
Exemplo n.º 19
0
 def _dilate(*indices):
     not_zero = []
     index_tuple = []
     for i in range(n):
         if not strides[i] == 1:
             index_tuple.append(idx_div(indices[i], strides[i]))
             not_zero.append(idx_mod(indices[i], strides[i]).equal(0))
         else:
             index_tuple.append(indices[i])
     if not_zero:
         not_zero = te.all(*not_zero)
         return te.if_then_else(not_zero, padded(*index_tuple),
                                tir.const(0.0, padded.dtype))
     return padded(*index_tuple)
    def _check_forward(constraints1, constraints2, varmap, backvarmap):
        ana = tvm.arith.Analyzer()
        all_vranges = vranges.copy()
        all_vranges.update({v: r for v, r in constraints1.ranges.items()})

        # Check that the transformation is injective
        cond_on_vars = tir.const(1, 'bool')
        for v in constraints1.variables:
            # variable mapping is consistent
            v_back = ana.simplify(tir.stmt_functor.substitute(varmap[v], backvarmap))
            cond_on_vars = te.all(cond_on_vars, v == v_back)
        # Also we have to check that the new relations are true when old relations are true
        cond_subst = tir.stmt_functor.substitute(
            te.all(tir.const(1, 'bool'), *constraints2.relations), backvarmap)
        # We have to include relations from vranges too
        for v in constraints2.variables:
            if v in constraints2.ranges:
                r = constraints2.ranges[v]
                range_cond = te.all(v >= r.min, v < r.min + r.extent)
                range_cond = tir.stmt_functor.substitute(range_cond, backvarmap)
                cond_subst = te.all(cond_subst, range_cond)
        cond_subst = ana.simplify(cond_subst)
        check_bruteforce(te.all(cond_subst, cond_on_vars), all_vranges,
                         cond=te.all(tir.const(1, 'bool'), *constraints1.relations))
Exemplo n.º 21
0
	result : int or float or bool or str
		result of evaluation
		or "Runtime Error" if there was an 
		error during evaluation
	"""
    try:
        return expr()
    except Exception:
        traceback.print_exc()
        return "Runtime Exception"


def compare_results(result_one, result_two):
    if (isinstance(result_one, (float, np.float32))
            or isinstance(result_two, (float, np.float32))):
        if np.isnan(result_one) and np.isnan(result_two):
            return True
        elif np.isinf(result_one) and np.isinf(result_two):
            if result_one > 0 and result_two > 0:
                return True
            elif result_one < 0 and result_two < 0:
                return True
            return False
        return np.isclose(result_one, result_two)
    return result_one == result_two


if (__name__ == "__main__"):
    a = te.var(name="a", dtype='int32')
    assert evaluate_tvm_expr(tir.const(5) + a, [a], {"a": 5}) == 10
Exemplo n.º 22
0
 def _get_pixel_value(n, c, h, w):
     return te.if_then_else(
         te.all(h >= 0, w >= 0, h < in_height, w < in_width),
         data[n, c, h, w],
         tir.const(0.0, dtype=data.dtype),
     )
Exemplo n.º 23
0
# 2/14/2020
# Bug descrption: Bitshifting by a float is the identity function
# PR: https://github.com/apache/incubator-tvm/pull/4892

import tvm
from tvm import tir, te
import numpy as np
import sys

hadError = False

try:

    shape = (1, 1)
    a = tir.const(dtype='int32', value=10)
    c = te.compute(
        shape, lambda i, j: a << 2.0
    )  #this should either be impossible or materialize as a * (2 ** 1.5)
    s = tvm.create_schedule([c.op])
    f = tvm.build(s, [c])
    c_tvm = tvm.nd.array(np.zeros(shape, dtype='float32'))
    f(c_tvm)
    print(c_tvm)
    assert False
except tvm.TVMError:
    pass
Exemplo n.º 24
0
# 2/15/2020
# Bug descrption: And'ing by a float causes codegen crash
# PR: https://github.com/apache/incubator-tvm/pull/4892


import tvm
from tvm import tir,te
import numpy as np
import sys

try:

	shape = (5,5)

	a = tir.const(dtype='float32',value=10)

	c = te.compute(shape,lambda i,j: a & 1.5 ^ a | ~a) 
	#Also affects | , &, ^, ~

	s = tvm.create_schedule([c.op])

	f = tvm.build(s,[c])

	c_tvm= tvm.nd.array(np.zeros(shape,dtype='float32'))
	f(c_tvm)
	print(c_tvm)
	assert false
except tvm.TVMError:
	pass