def _tensor_bop_impl(lhs, rhs): """Overloaded {op} operator. If both operands are non-zero-rank Tensors, it performs tensor-tensor {op} operation, and broadcasts inputs when necessary. If one operand is non-zero-rank Tensor, while the other operand is scalar like type (e.g., numeric types, Expr, or TensorSlice), it performs tensor-scalar {op} operation on an element-wise basis. Otherwise, it performs default generic.{op} operation, as defined in tvm.generic module. Parameters ---------- lhs : object Left operand. rhs : object Right operand. Returns ------- ret : tvm.Tensor (if at least one operand is non-zero-rank Tensor) tvm.Expr (otherwise) The result of {op} operation. """ def _get_rank(x): """Get the rank of a value. If x is Tensor, then return its rank; if x is scalar_like (i.e., numeric types, Expr, or TensorSlice), return 0; otherwise, return -1. """ if isinstance(x, tvm.tensor.Tensor): return len(x.shape) elif isinstance(x, (int, float, tvm.expr.Expr, tvm.tensor.TensorSlice)): return 0 return -1 rl = _get_rank(lhs) rr = _get_rank(rhs) if rl == -1 or rr == -1 or (rl == 0 and rr == 0): return orig_bop(lhs, rhs) elif rl > 0 and rr > 0: return broadcast_bop(lhs, rhs) elif rl == 0: f = lambda *i: elementwise_bop(lhs, rhs(*i)) with tvm.tag_scope(tag=tag.ELEMWISE): return tvm.compute(rhs.shape, f, "tensor_" + name) elif rr == 0: f = lambda *i: elementwise_bop(lhs(*i), rhs) with tvm.tag_scope(tag=tag.ELEMWISE): return tvm.compute(lhs.shape, f, "tensor_" + name) else: raise AssertionError("Cannot reach this line.")
def conv2d_winograd_nnpack_ww(cfg, data, transformed_kernel, bias, strides, padding, dilation, layout, out_dtype): """ TOPI compute callback. Use winograd NNPACK template """ N, CI, IH, IW = get_const_tuple(data.shape) if isinstance(dilation, int): dilation_h = dilation_w = dilation else: dilation_h, dilation_w = dilation assert (dilation_h, dilation_w) == (1, 1) assert len(transformed_kernel.shape) == 4 CO, _, _, _ = get_const_tuple(transformed_kernel.shape) HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides) HPAD, WPAD, _, _ = get_pad_tuple(padding, (3, 3)) KH, KW = 3, 3 assert layout == 'NCHW' assert KH == 3 and KW == 3 and HPAD == 1 and WPAD == 1 and HSTR == 1 and WSTR == 1 H = (IH + 2 * HPAD - 3) // HSTR + 1 W = (IW + 2 * WPAD - 3) // WSTR + 1 assert N == 1 with tvm.tag_scope("winograd_nnpack_conv2d_output"): output = tvm.contrib.nnpack.convolution_inference_without_weight_transform( data=data, transformed_kernel=transformed_kernel, bias=bias, padding=[HPAD, HPAD, WPAD, WPAD], stride=[HSTR, WSTR], algorithm=cfg['winograd_nnpack_algorithm'].val) # we have to manually assign effective GFLOP for winograd cfg.add_flop(2 * N * CI * H * W * KH * KW * CO) return output
def test_with(): n = tvm.var('n') m = tvm.var('m') l = tvm.var('l') A = tvm.placeholder((n, l), name='A') B = tvm.placeholder((m, l), name='B') with tvm.tag_scope(tag="gemm"): k = tvm.reduce_axis((0, l), name='k') C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k), attrs={ "hello": 1, "arr": [10, 12] }) assert C.op.tag == 'gemm' assert "hello" in C.op.attrs assert "xx" not in C.op.attrs assert C.op.attrs["hello"].value == 1 CC = tvm.load_json(tvm.save_json(C)) assert CC.op.attrs["hello"].value == 1 assert CC.op.attrs["arr"][0].value == 10 # str format happened to be json compatible assert json.loads(str(CC.op.attrs))["arr"][1] == 12
def conv2d_nchw_winograd_nnpack_without_weight_transform( cfg, data, transformed_kernel, bias, strides, padding, dilation, out_dtype): """Compute conv2d_nchw using NNPack winograd without weight transform""" N, CI, IH, IW = get_const_tuple(data.shape) if isinstance(dilation, int): dilation_h = dilation_w = dilation else: dilation_h, dilation_w = dilation assert (dilation_h, dilation_w) == (1, 1) assert len(transformed_kernel.shape) == 4 CO, _, _, _ = get_const_tuple(transformed_kernel.shape) HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides) KH, KW = 3, 3 pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW)) assert KH == 3 and KW == 3 and pt == 1 and pb == 1 and pl == 1 and pr == 1 and HSTR == 1\ and WSTR == 1 H = (IH + pt + pb - 3) // HSTR + 1 W = (IW + pl + pr - 3) // WSTR + 1 assert N == 1 with tvm.tag_scope("winograd_nnpack_conv2d_output"): output = tvm.contrib.nnpack.convolution_inference_without_weight_transform( data=data, transformed_kernel=transformed_kernel, bias=bias, padding=[pt, pb, pl, pr], stride=[HSTR, WSTR], algorithm=cfg['winograd_nnpack_algorithm'].val) # we have to manually assign effective GFLOP for winograd cfg.add_flop(2 * N * CI * H * W * KH * KW * CO) return output
def conv2d_arm_cpu_winograd_nnpack(cfg, data, kernel, strides, padding, dilation, layout, out_dtype, convolution_algorithm): """ TOPI compute callback. Use winograd NNPACK template """ N, CI, IH, IW = get_const_tuple(data.shape) if isinstance(dilation, int): dilation_h = dilation_w = dilation else: dilation_h, dilation_w = dilation assert (dilation_h, dilation_w) == (1, 1) assert len(kernel.shape) == 4 CO, _, KH, KW = get_const_tuple(kernel.shape) HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides) pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW)) assert layout == 'NCHW' assert KH == 3 and KW == 3 and pt == 1 and pb == 1 and pl == 1 and pr == 1 and HSTR == 1\ and WSTR == 1 H = (IH + pt + pb - 3) // HSTR + 1 W = (IW + pl + pr - 3) // WSTR + 1 cfg.define_knob('winograd_nnpack_algorithm', [convolution_algorithm]) assert N == 1 with tvm.tag_scope("winograd_nnpack_conv2d_weight_transform"): transformed_kernel = tvm.contrib.nnpack.convolution_inference_weight_transform( kernel, algorithm=cfg['winograd_nnpack_algorithm'].val) if autotvm.GLOBAL_SCOPE.in_tuning: transformed_kernel = tvm.compute(transformed_kernel.shape, lambda *args: 0.0) with tvm.tag_scope("winograd_nnpack_conv2d_output"): output = tvm.contrib.nnpack.convolution_inference_without_weight_transform( data, transformed_kernel, bias=None, padding=[pt, pb, pl, pr], stride=[HSTR, WSTR], algorithm=cfg['winograd_nnpack_algorithm'].val) # we have to manually assign effective GFLOP for winograd cfg.add_flop(2 * N * CI * H * W * KH * KW * CO) return output
def test_with(): n = tvm.var('n') m = tvm.var('m') l = tvm.var('l') A = tvm.placeholder((n, l), name='A') B = tvm.placeholder((m, l), name='B') with tvm.tag_scope(tag="gemm"): k = tvm.reduce_axis((0, l), name='k') C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k)) assert C.op.tag == 'gemm'
def compute_clip(attrs, inputs, _): """ Clip operator. """ x = inputs[0] a_min = attrs.get_float("a_min") a_max = attrs.get_float("a_max") const_min = tvm.const(a_min, x.dtype) const_max = tvm.const(a_max, x.dtype) with tvm.tag_scope(topi.tag.ELEMWISE): x = tvm.compute( x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") x = tvm.compute( x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") return x
def compute_clip(attrs, inputs, output_type, target): """ Clip operator. """ x = inputs[0] a_min = attrs.a_min a_max = attrs.a_max const_min = tvm.const(a_min, x.dtype) const_max = tvm.const(a_max, x.dtype) with tvm.tag_scope(topi.tag.ELEMWISE): x = tvm.compute( x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA") x = tvm.compute( x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") return [x]
def test_nested(): n = tvm.var('n') c = tvm.var('c') h = tvm.var('h') w = tvm.var('w') kh = tvm.var('kh') kw = tvm.var('kw') A = tvm.placeholder((n, c, h, w), name='A') B = tvm.placeholder((c, c, kh, kw), name='B') try: with tvm.tag_scope(tag='conv'): C = compute_conv(A, B) assert False except ValueError: pass
def test_with(): n = tvm.var('n') m = tvm.var('m') l = tvm.var('l') A = tvm.placeholder((n, l), name='A') B = tvm.placeholder((m, l), name='B') with tvm.tag_scope(tag="gemm"): k = tvm.reduce_axis((0, l), name='k') C = tvm.compute((n, m), lambda i, j: tvm.sum(A[i, k] * B[j, k], axis=k), attrs={"hello" : 1, "arr": [10, 12]}) assert C.op.tag == 'gemm' assert "hello" in C.op.attrs assert "xx" not in C.op.attrs assert C.op.attrs["hello"].value == 1 CC = tvm.load_json(tvm.save_json(C)) assert CC.op.attrs["hello"].value == 1 assert CC.op.attrs["arr"][0].value == 10 # str format happened to be json compatible assert json.loads(str(CC.op.attrs))["arr"][1] == 12