def verify_broadcast_binary_ele(lhs_shape, rhs_shape, typ="add"): # Build the logic and compile the function A = tvm.placeholder(shape=lhs_shape, name="A") B = tvm.placeholder(shape=rhs_shape, name="B") if typ == "add": C = topi.broadcast_add(A, B) elif typ == "sub": C = topi.broadcast_sub(A, B) elif typ == "div": C = topi.broadcast_div(A, B) elif typ == "mul": C = topi.broadcast_mul(A, B) elif typ == "maximum": C = topi.broadcast_maximum(A, B) elif typ == "minimum": C = topi.broadcast_minimum(A, B) else: raise NotImplementedError def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return with tvm.target.create(device): s = topi.generic.schedule_broadcast(C) ctx = tvm.context(device, 0) foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + typ) lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype) rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype) if typ == "add": out_npy = lhs_npy + rhs_npy elif typ == "sub": out_npy = lhs_npy - rhs_npy elif typ == "div": rhs_npy = np.abs(rhs_npy) + 0.001 out_npy = lhs_npy / rhs_npy elif typ == "mul": out_npy = lhs_npy * rhs_npy elif typ == "maximum": out_npy = np.maximum(lhs_npy, rhs_npy) elif typ == "minimum": out_npy = np.minimum(lhs_npy, rhs_npy) else: raise NotImplementedError lhs_nd = tvm.nd.array(lhs_npy, ctx) rhs_nd = tvm.nd.array(rhs_npy, ctx) out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx) for _ in range(1): foo(lhs_nd, rhs_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4) check_device("opencl") check_device("cuda") check_device("metal") check_device("rocm")
def compute_conv2d(attrs, inputs, _): """Compute definition of conv2d""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs["layout"] assert layout == "NCHW" or layout == "NHWC" (dilation_h, dilation_w) = dilation if dilation_h < 1 or dilation_w < 1: raise ValueError("dilation should be positive value") elif dilation == (1, 1): kernel = inputs[1] elif layout == "NCHW": kernel = topi.nn.dilate(inputs[1], [1, 1, dilation_h, dilation_w]) else: #layout == NHWC kernel = topi.nn.dilate(inputs[1], [1, dilation_h, dilation_w, 1]) if groups == 1: out = topi.nn.conv2d(inputs[0], kernel, strides, padding, layout) elif groups == get_const_int(inputs[0].shape[1]) and groups == channels: out = topi.nn.depthwise_conv2d_nchw(inputs[0], kernel, strides, padding) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): bias = inputs[2] expand_axis = 1 if layout == "NCHW" else 0 bias = topi.expand_dims(bias, axis=expand_axis, num_newaxis=2) out = topi.broadcast_add(out, bias) return out
def verify_broadcast_binary_ele(lhs_shape, rhs_shape, typ="add"): # Build the logic and compile the function A = tvm.placeholder(shape=lhs_shape, name="A") B = tvm.placeholder(shape=rhs_shape, name="B") if typ == "add": C = topi.broadcast_add(A, B) elif typ == "sub": C = topi.broadcast_sub(A, B) elif typ == "div": C = topi.broadcast_div(A, B) elif typ == "mul": C = topi.broadcast_mul(A, B) elif typ == "maximum": C = topi.broadcast_maximum(A, B) elif typ == "minimum": C = topi.broadcast_minimum(A, B) else: raise NotImplementedError def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_broadcast(C) foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + typ) lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype) rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype) if typ == "add": out_npy = lhs_npy + rhs_npy elif typ == "sub": out_npy = lhs_npy - rhs_npy elif typ == "div": rhs_npy = np.abs(rhs_npy) + 0.001 out_npy = lhs_npy / rhs_npy elif typ == "mul": out_npy = lhs_npy * rhs_npy elif typ == "maximum": out_npy = np.maximum(lhs_npy, rhs_npy) elif typ == "minimum": out_npy = np.minimum(lhs_npy, rhs_npy) else: raise NotImplementedError lhs_nd = tvm.nd.array(lhs_npy, ctx) rhs_nd = tvm.nd.array(rhs_npy, ctx) out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx) for _ in range(1): foo(lhs_nd, rhs_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4) check_device("vulkan") check_device("opencl") check_device("cuda") check_device("metal") check_device("rocm")
def test_broadcast_binary_op(lhs_shape, rhs_shape, typ="add"): global TASK TASK = "bcast_binary_" + typ + "_lhs" +\ "_".join([str(ele) for ele in lhs_shape]) +\ "rhs" + "_".join([str(ele) for ele in rhs_shape]) A = tvm.te.placeholder(shape=lhs_shape, name="A") B = tvm.te.placeholder(shape=rhs_shape, name="B") if typ == "add": C = topi.broadcast_add(A, B) elif typ == "sub": C = topi.broadcast_sub(A, B) elif typ == "div": C = topi.broadcast_div(A, B) elif typ == "mul": C = topi.broadcast_mul(A, B) elif typ == "maximum": C = topi.broadcast_maximum(A, B) elif typ == "minimum": C = topi.broadcast_minimum(A, B) else: raise NotImplementedError s = topi.cuda.schedule_broadcast(C) fcuda = tvm.build(s, [A, B, C], "cuda", name="broadcast_binary" + "_" + typ) lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype) rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype) if typ == "add": out_npy = lhs_npy + rhs_npy elif typ == "sub": out_npy = lhs_npy - rhs_npy elif typ == "div": rhs_npy = np.abs(rhs_npy) + 0.001 out_npy = lhs_npy / rhs_npy elif typ == "mul": out_npy = lhs_npy * rhs_npy elif typ == "maximum": out_npy = np.maximum(lhs_npy, rhs_npy) elif typ == "minimum": out_npy = np.minimum(lhs_npy, rhs_npy) lhs_nd = tvm.nd.array(lhs_npy, tvm.gpu()) rhs_nd = tvm.nd.array(rhs_npy, tvm.gpu()) out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), tvm.gpu()) for _ in range(2): fcuda(lhs_nd, rhs_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
def test_broadcast_binary_op(lhs_shape, rhs_shape, typ="add"): global TASK TASK = "bcast_binary_" + typ + "_lhs" +\ "_".join([str(ele) for ele in lhs_shape]) +\ "rhs" + "_".join([str(ele) for ele in rhs_shape]) A = tvm.placeholder(shape=lhs_shape, name="A") B = tvm.placeholder(shape=rhs_shape, name="B") if typ == "add": C = topi.broadcast_add(A, B) elif typ == "sub": C = topi.broadcast_sub(A, B) elif typ == "div": C = topi.broadcast_div(A, B) elif typ == "mul": C = topi.broadcast_mul(A, B) elif typ == "maximum": C = topi.broadcast_maximum(A, B) elif typ == "minimum": C = topi.broadcast_minimum(A, B) else: raise NotImplementedError s = topi.cuda.schedule_broadcast(C) fcuda = tvm.build(s, [A, B, C], "cuda", name="broadcast_binary" + "_" + typ) lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype) rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype) if typ == "add": out_npy = lhs_npy + rhs_npy elif typ == "sub": out_npy = lhs_npy - rhs_npy elif typ == "div": rhs_npy = np.abs(rhs_npy) + 0.001 out_npy = lhs_npy / rhs_npy elif typ == "mul": out_npy = lhs_npy * rhs_npy elif typ == "maximum": out_npy = np.maximum(lhs_npy, rhs_npy) elif typ == "minimum": out_npy = np.minimum(lhs_npy, rhs_npy) lhs_nd = tvm.nd.array(lhs_npy, tvm.gpu()) rhs_nd = tvm.nd.array(rhs_npy, tvm.gpu()) out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), tvm.gpu()) for _ in range(2): fcuda(lhs_nd, rhs_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
def compute_contrib_conv2d_NCHWc(attrs, inputs, _): """Compute definition of conv2d NCHWc""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") kh, kw = attrs.get_int_tuple('kernel_size') groups = attrs.get_int("groups") channels = attrs.get_int("channels") assert dilation == (1, 1), "not support dilate now" if groups == 1: out = topi.nn.conv2d_NCHWc(inputs[0], inputs[1], channels, (kh, kw), strides, padding) else: raise ValueError("not support arbitrary group number > 1 for now") if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.broadcast_add(out, bias) return out
def mxnet_check(): """This is a simple test function for MXNet bridge It is not included as nosetests, because of its dependency on mxnet User can directly run this script to verify correctness. """ import mxnet as mx import topi import tvm import numpy as np from tvm.contrib.mxnet import to_mxnet_func # build a TVM function through topi n = 20 shape = (20,) scale = tvm.var("scale", dtype="float32") x = tvm.placeholder(shape) y = tvm.placeholder(shape) z = topi.broadcast_add(x, y) zz = tvm.compute(shape, lambda *i: z(*i) * scale) target = tvm.target.cuda() # build the function with target: s = topi.generic.schedule_injective(zz) f = tvm.build(s, [x, y, zz, scale]) # get a mxnet version mxf = to_mxnet_func(f, const_loc=[0, 1]) ctx = mx.gpu(0) xx = mx.nd.uniform(shape=shape, ctx=ctx) yy = mx.nd.uniform(shape=shape, ctx=ctx) zz = mx.nd.empty(shape=shape, ctx=ctx) # invoke myf: this runs in mxnet engine mxf(xx, yy, zz, 10.0) mxf(xx, yy, zz, 10.0) tvm.testing.assert_allclose( zz.asnumpy(), (xx.asnumpy() + yy.asnumpy()) * 10)
def mxnet_check(): """This is a simple test function for MXNet bridge It is not included as pytests, because of its dependency on mxnet User can directly run this script to verify correctness. """ import mxnet as mx import topi import tvm from tvm import te import numpy as np from tvm.contrib.mxnet import to_mxnet_func # build a TVM function through topi n = 20 shape = (20, ) scale = te.var("scale", dtype="float32") x = te.placeholder(shape) y = te.placeholder(shape) z = topi.broadcast_add(x, y) zz = te.compute(shape, lambda *i: z(*i) * scale) target = tvm.target.cuda() # build the function with target: s = topi.generic.schedule_injective(zz) f = tvm.build(s, [x, y, zz, scale]) # get a mxnet version mxf = to_mxnet_func(f, const_loc=[0, 1]) ctx = mx.gpu(0) xx = mx.nd.uniform(shape=shape, ctx=ctx) yy = mx.nd.uniform(shape=shape, ctx=ctx) zz = mx.nd.empty(shape=shape, ctx=ctx) # invoke myf: this runs in mxnet engine mxf(xx, yy, zz, 10.0) mxf(xx, yy, zz, 10.0) tvm.testing.assert_allclose(zz.asnumpy(), (xx.asnumpy() + yy.asnumpy()) * 10)
def compute_conv2d_transpose(attrs, inputs, _): """Compute definition of conv2d_transpose""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") layout = attrs["layout"] assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" assert groups == 1, "only support groups == 1 for now" out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding) if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.broadcast_add(out, bias) output_padding = attrs.get_int_tuple("output_padding") out = topi.nn.pad(out, \ [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]]) return out
def compute_conv2d_transpose(attrs, inputs, _): """Compute definition of conv2d_transpose""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") layout = attrs["layout"] assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" assert groups == 1, "only support groups == 1 for now" out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding) if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.broadcast_add(out, bias) output_padding = attrs.get_int_tuple("output_padding") out = topi.nn.pad(out, \ [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]]) return out
def compute_conv2d(attrs, inputs, _): """Compute definition of conv2d""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs["layout"] assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" if groups == 1: out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding) elif groups == get_const_int(inputs[0].shape[1]) and groups == channels: out = topi.nn.depthwise_conv2d_nchw(inputs[0], inputs[1], strides, padding) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.broadcast_add(out, bias) return out
def compute_conv2d(attrs, inputs, _): """Compute definition of conv2d""" padding = attrs.get_int_tuple("padding") strides = attrs.get_int_tuple("strides") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int("groups") channels = attrs.get_int("channels") layout = attrs["layout"] assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" if groups == 1: out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding) elif groups == get_const_int(inputs[0].shape[1]) and groups == channels: out = topi.nn.depthwise_conv2d_nchw(inputs[0], inputs[1], strides, padding) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): bias = inputs[2] bias = topi.expand_dims(bias, axis=1, num_newaxis=2) out = topi.broadcast_add(out, bias) return out