Пример #1
0
    def _meshgrid(height, width):

        y_zero = tvm.compute((height, ),
                             lambda i: -1 + i * 2.0 / (height - 1),
                             name='y0')
        x_zero = tvm.compute((width, ),
                             lambda i: -1 + i * 2.0 / (width - 1),
                             name='x0')

        y_temp = tvm.compute((height * width, ),
                             lambda i: y_zero[i // width],
                             name='y')
        x_temp = tvm.compute((height * width, ),
                             lambda i: x_zero[i % width],
                             name='x')

        y_temp = topi.reshape(y_temp, (1, height * width))
        x_temp = topi.reshape(x_temp, (1, height * width))
        ones = tvm.compute((1, height * width), lambda i, j: 1, name='ones')

        grid = tvm.compute(
            (3, height * width),
            lambda i, j: 0.5 * (i - 1) * (i - 2) * x_temp[0, j] + i *
            (2 - i) * y_temp[0, j] + 0.5 * i * (i - 1) * ones[0, j],
            name='grid')
        # grid = topi.concatenate((x,y,ones),0) #can not use topi.concatenate
        return grid
    def _transform(theta, input_dim, out_size, input_shape, dtype):
        
        num_batch = input_shape[0]
        height = input_shape[1]
        width = input_shape[2]
        num_channels = input_shape[3]

        theta = topi.reshape(theta, (num_batch, 2, 3))
        theta = topi.cast(theta, dtype)

        out_height = out_size[0]
        out_width = out_size[1]
                
        grid = _meshgrid(out_height, out_width)       
        grid = topi.reshape(grid, (num_batch, 3, out_height*out_width))
        grid = topi.cast(grid, dtype=dtype)
        
        k = tvm.reduce_axis((0, 3), 'k')
        T_g = tvm.compute((num_batch, 2, out_height*out_width),lambda b, y, x: tvm.sum(theta[b, y, k] * grid[b, k, x], axis = k), name = 'T_g')
              
        x_s = tvm.compute((num_batch, 1, out_height*out_width), lambda i,j,k:T_g[i,0,k], name = 'x_s')
        y_s = tvm.compute((num_batch, 1, out_height*out_width), lambda i,j,k:T_g[i,1,k], name = 'y_s')
              
        x_s_flat = topi.reshape(x_s, (num_batch*out_height*out_width,))
        y_s_flat = topi.reshape(y_s, (num_batch*out_height*out_width,))
                      
        input_transformed = _interpolate(input_dim, input_shape, x_s_flat, y_s_flat, out_size, dtype)
        output = topi.reshape(input_transformed, [num_batch, out_height, out_width, num_channels])
        return output 
Пример #3
0
    def forward_(x_op, name):
        # this is 2d only specialized implementation of topi.nn.softmax
        if x_op.ndim == 1:
            x = topi.reshape(x_op.tvm_tensor, (1, x_op.size))
            m = 1
            n = x_op.shape[0]
        elif x_op.ndim == 2:
            x = x_op.tvm_tensor
            m, n = x_op.shape
        else:
            raise ValueError(f'Given ndim {x_op.ndim} is not supported')

        k = tvm.reduce_axis((0, n), name='k')
        max_elem = tvm.compute((m, ),
                               lambda i: tvm.max(x[i, k], axis=k),
                               name=f'{name}:max_elem')

        k = tvm.reduce_axis((0, n), name='k')
        expsum = tvm.compute(
            (m, ),
            lambda i: tvm.sum(tvm.exp(x[i, k] - max_elem[i]), axis=k),
            name=f'{name}:expsum')

        softmax = tvm.compute(
            x.shape,
            lambda i, j: tvm.exp(x[i, j] - max_elem[i]) / expsum[i],
            name=f'{name}:softmax')

        if x_op.ndim == 1:
            softmax = topi.reshape(softmax, x_op.shape)

        return softmax, max_elem, expsum
Пример #4
0
def reshape(tensor, shape, sph=None, dst_scope='buffer0'):
    res = topi.reshape(tensor, shape)

    MarkScope(res, dst_scope)
    PragmaCopy(res)

    return res
Пример #5
0
def verify_reshape(src_shape, dst_shape):
    A = tvm.placeholder(shape=src_shape, name="A")
    B = topi.reshape(A, dst_shape)

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_injective(B)
        foo = tvm.build(s, [A, B], device, name="reshape")
        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
        out_npy = np.reshape(data_npy, newshape=dst_shape)
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in [
            "llvm", "nvptx", "cuda", "opencl", "metal", "rocm", "vulkan",
            "sdaccel"
    ]:
        check_device(device)
Пример #6
0
def FullConnection(device="llvm",
                   lib_path="./",
                   ndim_a=None,
                   dtype=None,
                   has_bias=None):
    '''
    full connection
    Args:
        device:
        lib_path:
        ndim_a:
        dtype:
        hasBias:

    Returns:
    '''
    n_dim, ci, h_dim, kernel_tensor = (tvm.var("n_dim"), tvm.var("out_tensor"), tvm.var("h_dim"), \
                                       tvm.var("kernel_tensor"))
    co = tvm.var("co")
    if ndim_a == 4:
        shape_a = (n_dim, ci, h_dim, kernel_tensor)
        chw = ci * h_dim * kernel_tensor
    else:
        shape_a = (n_dim, ci)
        chw = ci
    shape_w = (co, chw)
    opname = "FullConnection_ndimA%d_%s_%s" % (ndim_a, dtype, "hasBias"
                                               if has_bias else "notHasBias")
    is_var = True
    vh, vw, vc = 1, 1, 1
    print(opname)

    in_tensor = tvm.placeholder(shape_a, dtype=dtype, name='in_tensor')
    kernel_tensor = tvm.placeholder(shape_w, dtype=dtype, name='kernel_tensor')
    input_tensor = topi.reshape(in_tensor,
                                (n_dim,
                                 chw)) if len(shape_a) == 4 else in_tensor

    out_tensor = _matmul_spatial_pack_asm((is_var, 0, ci, vh, vw, vc), input_tensor, kernel_tensor, \
                                          layout='NC', out_dtype=dtype)
    if has_bias:
        bias = tvm.placeholder((co, ), dtype=dtype, name='bias')
        out_tensor = tvm.compute((n_dim, co),
                                 lambda n, co: out_tensor[n, co] + bias[co],
                                 tag='injective')

    tensor_list = [in_tensor, kernel_tensor, bias, out_tensor
                   ] if has_bias else [in_tensor, kernel_tensor, out_tensor]
    cfg = {
        'is_var': is_var,
        'is_transpose': 0,
        'core_id': 0,
        'CI': ci,
        'VH': vh,
        'VW': vw,
        'VC': vc
    }
    s = _matmul_schedule_asm(cfg, [out_tensor])
    Genlib(s, tensor_list, device, opname, lib_path)
Пример #7
0
 def __call__(self, inputs):
     outputs = topi.nn.conv2d(inputs, self.weight, self.stride,
                              self.padding, self.dilation)
     if self.bias:  # TODO: check bias shape
         reshaped_bias = topi.reshape(
             self.bias, (self.in_channels, self.out_channels, 1, 1))
         outputs += reshaped_bias
     return outputs
Пример #8
0
    def algorithm_forward(self):
        assert self.x.size == self.t.size, \
            "only supports one-hot vector"

        self.softmax, self.max_elem, self.expsum = Softmax.forward_(
            self.x, name=f'{self.label}')

        if self.x.ndim == 1:
            y = topi.reshape(self.softmax, (1, self.x.size))
            t = topi.reshape(self.t.tvm_tensor, (1, self.t.size))
            m = 1
            n = self.x.shape[0]
        elif self.x.ndim == 2:
            y = self.softmax
            t = self.t.tvm_tensor
            m, n = self.x.shape
        else:
            raise NotImplementedError

        self.ty = tvm.compute((m, n),
                              lambda i, j: tvm.log(y[i, j]) * t[i, j],
                              name=f'{self.label}:ty')

        k = tvm.reduce_axis((0, n), name='k')
        self.sum_ty = tvm.compute((m, ),
                                  lambda i: tvm.sum(self.ty[i, k], axis=k),
                                  name=f'{self.label}:sum_ty')

        # TODO: need to validate the shape and keepdims
        # self.shape would be like (1,1,1), which size is 1
        expected_size = 1
        assert self.size == expected_size, \
            f'size of SoftmaxWithCrossEntropyLoss must be {expected_size}, not {self.size}'

        k = tvm.reduce_axis((0, m), name='k')
        self.total = tvm.compute(self.shape,
                                 lambda *idxs: tvm.sum(self.sum_ty[k], axis=k),
                                 name=f'{self.label}:total')

        self.tvm_tensor = tvm.compute(self.shape,
                                      lambda *idxs: -self.total[idxs] / m,
                                      name=f'{self.label}:tensor')
Пример #9
0
def _declaration_conv_NCHWc_int8(wkl, sch, data, kernel):
    """ Declaration for int8 conv"""
    out_dtype = wkl.out_dtype
    HPAD, WPAD = wkl.hpad, wkl.wpad
    HSTR, WSTR = wkl.hstride, wkl.wstride

    batch_size = data.shape[0]
    out_height = (wkl.height + 2 * HPAD - wkl.hkernel) // HSTR + 1
    out_width = (wkl.width + 2 * WPAD - wkl.wkernel) // WSTR + 1

    DOPAD = (HPAD != 0 or WPAD != 0)
    if DOPAD:
        data_pad = pad(data, (0, 0, HPAD, WPAD, 0), name="data_pad")
    else:
        data_pad = data

    oshape = (batch_size, wkl.out_filter // sch.oc_bn, out_height, out_width,
              sch.oc_bn)

    # Intel performs dot product of 2 "4" Int8 values
    n_elems = 4
    assert sch.ic_bn % n_elems == 0
    ic_outer = tvm.reduce_axis((0, wkl.in_filter // (sch.ic_bn)),
                               name='ic_outer')
    ic_f_inner = tvm.reduce_axis((0, sch.ic_bn // n_elems), name='ic_f_inner')
    ic_s_inner = tvm.reduce_axis((0, n_elems), name='ic_s_inner')

    # Reshaping kernel as the last 2 dimensions are 1x1 (k_h x k_w)
    k_shape = kernel.shape
    kernel = topi.reshape(kernel,
                          (k_shape[0], k_shape[1], k_shape[2], k_shape[3],
                           k_shape[4] * k_shape[5] * k_shape[6]))

    conv = tvm.compute(
        oshape,
        lambda n, oc_chunk, oh, ow, oc_block: tvm.sum(
            data_pad[n, ic_outer, oh * HSTR, ow * WSTR, ic_f_inner * n_elems +
                     ic_s_inner].astype(out_dtype) * kernel[
                         oc_chunk, ic_outer, ic_f_inner, oc_block, ic_s_inner].
            astype(out_dtype),
            axis=[ic_outer, ic_f_inner, ic_s_inner]),
        name='conv2d_NCHWc_int8',
        tag="conv2d_NCHWc_int8")

    return conv
Пример #10
0
def verify_reshape(src_shape, dst_shape):
    A = tvm.placeholder(shape=src_shape, name="A")
    B = topi.reshape(A, dst_shape)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_injective(B)
        foo = tvm.build(s, [A, B], device, name="reshape")
        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
        out_npy = np.reshape(data_npy, newshape=dst_shape)
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in get_all_backend():
        check_device(device)
Пример #11
0
def verify_reshape(src_shape, dst_shape):
    A = te.placeholder(shape=src_shape, name="A")
    B = topi.reshape(A, dst_shape)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.testing.get_injective_schedule(device)(B)
        foo = tvm.build(s, [A, B], device, name="reshape")
        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
        out_npy = np.reshape(data_npy, newshape=dst_shape)
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in get_all_backend():
        check_device(device)
Пример #12
0
def verify_reshape(src_shape, dst_shape):
    A = tvm.placeholder(shape=src_shape, name="A")
    B = topi.reshape(A, dst_shape)
    s = topi.cuda.schedule_injective(B)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
        foo = tvm.build(s, [A, B], device, name="reshape")
        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
        out_npy = np.reshape(data_npy, newshape=dst_shape)
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    check_device("cuda")
    check_device("opencl")
    check_device("metal")
Пример #13
0
inits = [(np.zeros, 'shape'), (np.zeros, 'shape'), (np.zeros, 'shape'),
         (np.zeros, 'shape'), (np.zeros, 'shape'), (np.ones, 'shape'),
         (np.zeros, 'shape'), (np.zeros, 'shape'), (np.random.normal, 'size'),
         (np.random.normal, 'size')]

# Graph input
x = tvm.placeholder((batch_size, num_timesteps * num_input), 'float32')
y = tvm.placeholder((batch_size, num_classes), 'float32')
s = tvm.placeholder((batch_size, num_hidden), 'float32')
h = tvm.placeholder((batch_size, num_hidden), 'float32')

# Tensors and vars for training graph
weights = [tvm.placeholder(x, 'float32') for x in sizes]

#Construct model
xs = topi.split(topi.reshape(x, (batch_size, num_timesteps, num_input)),
                num_timesteps,
                axis=1)
xs = [topi.reshape(x, (batch_size, num_input)) for x in xs]
new_s = s
new_h = h
for i in range(num_timesteps):
    inp = topi.concatenate([xs[i], new_h], 1)
    g = topi.tanh(topi.matmul(inp, weights[0]) + weights[1])
    j = topi.sigmoid(topi.matmul(inp, weights[2]) + weights[3])
    f = topi.sigmoid(topi.matmul(inp, weights[4]) + weights[5])
    o = topi.sigmoid(topi.matmul(inp, weights[6]) + weights[7])

    new_s = new_s * f + g * j
    new_h = topi.tanh(new_s) * o
Пример #14
0
def compute_reshape_like(attrs, inputs, out_info):
    """Compute definition of reshape_like"""
    return topi.reshape(inputs[0], inputs[1].shape)
Пример #15
0
def ConvVar(device="llvm", lib_path="./", optype=None,\
            ndim=None, layout=None, dtype=None, kernels=None,\
            strides=None, pad=None, dilations=None,\
            hasbias=None, activation_type=None,\
            config_entity=None, impl_dtype=None, channel_multiplier=None,\
            use_arm32=False, cfg=None):
    '''
    convolution
    Args:
        device:
        lib_path:
        optype:
        ndim:
        layout:
        dtype:
        kernels:
        strides:
        pad:
        dilations:
        hasbias:
        activationType:
        configEntity:
        impl_dtype:
        channel_multiplier:
        use_arm32:
        cfg:

    Returns:
    '''
    use_depthwise = optype == 'ConvolutionDepthwise'
    use_deconv = optype == 'Deconvolution'
    use_deconv_depthwise = optype == 'DeConvolutionDepthwise'
    has_bias = hasbias

    ow = 1 if cfg is None else cfg['VW']
    oh = 1 if cfg is None else cfg['VH']
    oc = 1 if cfg is None else cfg['VC']
    kh, kw = kernels
    op_name = "%s_ndim%d_%s_k%d_s%d_p%d%d%d%d_d%d_act%d_vc%d_vh%d_vw%d_hasbias%d" % ( \
              map_conv[optype], ndim, dtype, \
              kh, strides[0], pad[0], pad[1], pad[2], pad[3], dilations[0], \
              activation_enum_map[activation_type], oc, oh, ow, hasbias)
    batch = tvm.var("batch")
    in_channel = tvm.var("in_channel")
    in_height, in_width = tvm.var("in_height"), tvm.var("in_width")
    pad_up, pad_down, pad_left, pad_right = pad
    opname = op_name

    print("Conv", opname, config_entity)

    if impl_dtype is None:
        impl_dtype = dtype

    if use_depthwise:
        multiplier = channel_multiplier
        out_channel = in_channel * multiplier
    elif use_deconv_depthwise:
        multiplier = channel_multiplier
        out_channel = in_channel * multiplier
    else:
        out_channel = tvm.var("out_channel")

    # define placeholder
    input_tensor = in_tensor = tvm.placeholder(
        (batch, in_channel, in_height, in_width),
        dtype=dtype,
        name='in_tensor')

    if use_depthwise:
        temp_tensor = kernel_tensor = tvm.placeholder((in_channel, multiplier, kh, kw), dtype=dtype,\
                                                      name='kernel_tensor')
    elif use_deconv:
        temp_tensor = kernel_tensor = tvm.placeholder((in_channel, out_channel, kh, kw), dtype=dtype,\
                                                      name='kernel_tensor')
    elif use_deconv_depthwise:
        temp_tensor = kernel_tensor = tvm.placeholder((in_channel, multiplier, kh, kw), dtype=dtype,\
                                                      name='kernel_tensor')
    else:
        temp_tensor = kernel_tensor = tvm.placeholder((out_channel, in_channel, kh, kw), dtype=dtype,\
                                                      name='kernel_tensor')
    if has_bias:
        bias = tvm.placeholder((out_channel, ), dtype=dtype, name='bias')
        bias1 = topi.reshape(bias, (out_channel, 1, 1))

    if impl_dtype != dtype:
        input_tensor = AsType(input_tensor, impl_dtype)
        temp_tensor = AsType(temp_tensor, impl_dtype)
        if has_bias:
            bias1 = AsType(bias1, impl_dtype)

    # define compute & schedule
    if pad_up != pad_down or pad_left != pad_right:
        input_tensor = topi.nn.pad(input_tensor, [0, 0, pad_up, pad_left],
                                   [0, 0, pad_down, pad_right],
                                   name='data_pad')
        padding = 0, 0
    else:
        padding = pad_up, pad_left
    if use_depthwise:
        cfg1 = (True, 1, 1,
                1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"],
                                        cfg["tile_co"])
        out_tensor = _depthwise_spatial_pack(cfg1, input_tensor, temp_tensor, strides, padding, dilations,\
                                             out_dtype=impl_dtype)
    elif use_deconv:

        def GetInput(input_tensor, temp_tensor, padding):
            _, out_c, filter_h, filter_w = temp_tensor.shape
            if out_c is None:
                print("temp_tensor.shape err")
            stride_h, stride_w = strides
            # dilate stage
            dilated_input = topi.nn.dilate(input_tensor,
                                           [1, 1, stride_h, stride_w],
                                           name='DilatedInput')
            # padding stage
            fpad_top, fpad_left, fpad_bottom, fpad_right = topi.nn.get_pad_tuple(
                padding, (filter_h, filter_w))
            bpad_top = filter_h - 1 - fpad_top
            bpad_bottom = filter_h - 1 - fpad_bottom
            bpad_left = filter_w - 1 - fpad_left
            bpad_right = filter_w - 1 - fpad_right
            padded_input = topi.nn.pad(dilated_input, \
                                      [0, 0, bpad_top, bpad_left], \
                                      [0, 0, bpad_bottom, bpad_right], \
                                      name='PaddedInput')
            return padded_input

        special_deconv = kh == 2 and kw == 2 and strides[0] == 2 and strides[
            1] == 2
        # special_deconv = False
        if special_deconv:
            out_tensor = OptimalOut(input_tensor, temp_tensor, in_channel)
        else:
            out_tensor = BaseImplementation(input_tensor, temp_tensor,
                                            GetInput, layout, padding)
    elif use_deconv_depthwise:

        def GetInput(input_tensor, temp_tensor, padding):
            _, out_c, filter_h, filter_w = temp_tensor.shape
            if out_c is None:
                print("temp_tensor.shape err")
            stride_h, stride_w = strides
            # dilate stage
            dilated_input = topi.nn.dilate(input_tensor,
                                           [1, 1, stride_h, stride_w],
                                           name='DilatedInput')
            # padding stage
            fpad_top, fpad_left, fpad_bottom, fpad_right = topi.nn.get_pad_tuple(
                padding, (filter_h, filter_w))
            bpad_top = filter_h - 1 - fpad_top
            bpad_bottom = filter_h - 1 - fpad_bottom
            bpad_left = filter_w - 1 - fpad_left
            bpad_right = filter_w - 1 - fpad_right
            padded_input = topi.nn.pad(dilated_input, \
                                      [0, 0, bpad_top, bpad_left], \
                                      [0, 0, bpad_bottom, bpad_right], \
                                      name='PaddedInput')
            return padded_input

        temp_tensor = topi.flip(temp_tensor, axis=-1)
        temp_tensor = topi.flip(temp_tensor, axis=-2)
        out_tensor = topi.nn.depthwise_conv2d_nchw(GetInput(input_tensor, temp_tensor, padding), temp_tensor, (1, 1), \
                                                   padding, (1, 1), out_dtype=input_tensor.dtype)
    else:
        cfg1 = (True, 1, 1,
                1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"],
                                        cfg["tile_co"])
        out_tensor = _conv_spatial_pack_asm(cfg1, input_tensor, temp_tensor, strides, padding, dilations,\
                                            out_dtype=impl_dtype)

    if has_bias:
        out_tensor = tvm.compute(out_tensor.shape, lambda n, co, h, w: out_tensor[n, co, h, w] + bias1[co][0][0],\
                                 tag="injective")
    out_tensor = TopiActivation(out_tensor, activation_type)
    if impl_dtype != dtype:
        out_tensor = AsType(out_tensor, dtype)

    # create schedule
    if use_arm32:
        s = tvm.create_schedule(out_tensor.op)
    elif use_depthwise:
        s = schedule_depthwise_conv2d_nchw_arm(cfg, [out_tensor])
    elif use_deconv:
        if special_deconv:
            s = tvm.create_schedule([out_tensor.op])
        else:
            s = topi.generic.schedule_conv2d_nchw([out_tensor])
    elif use_deconv_depthwise:
        s = tvm.create_schedule([out_tensor.op])
    else:
        s = schedule_conv2d_nchw_arm_cpu([out_tensor])

    # generate lib
    attr = [
        batch, in_channel, in_height, in_width, out_channel, in_tensor,
        kernel_tensor
    ]
    tensor_list = [*attr, bias, out_tensor
                   ] if has_bias else [*attr, out_tensor]
    Genlib(s, tensor_list, device, opname, lib_path)
def flatten_topi(inputs):
    N, C, H, W = inputs.shape
    return topi.reshape(inputs, [N, C * H * W])
Пример #17
0
packed_output_shape = (N, G, K // G, P, Q)
output_shape = (N, K, P, Q)

I = te.placeholder(input_shape, name="I")
W = te.placeholder(weight_shape, name="W")

### reductions
rc = te.reduce_axis((0, C // G), name='rc')
ry = te.reduce_axis((0, R), name='ry')
rx = te.reduce_axis((0, S), name='rx')

ig = C // G
og = K // G

###  (K,C//G,R,S) to (G,K//G,C//G,R,S)
W_pack = topi.reshape(W, (packed_weight_shape))

O = te.compute(
    packed_output_shape, lambda n, g, co, x, y: te.sum(I[n, rc + (
        g * ig), x + rx, y + ry] * W_pack[g, co, rc, rx, ry],
                                                       axis=[rc, ry, rx]))

s = te.create_schedule(O.op)
s[W_pack].compute_inline()

ir = tvm.lower(s, [I, W, O])
print(ir)

### COMPILE AND RUN
tgt_host = "llvm"
tgt = "llvm"
Пример #18
0
def Deconv(device="llvm",
           lib_path="./",
           optype=None,
           ndim=None,
           dtype=None,
           kernels=None,
           strides=None,
           pad=None,
           dilations=None,
           hasbias=None,
           activation_type=None,
           config_entity=None,
           impl_dtype=None,
           use_arm32=False,
           cfg=None):
    '''
    Deconvolution
    Args:
        device:
        lib_path:
        optype:
        ndim:
        dtype:
        kernels:
        strides:
        pad:
        dilations:
        hasbias:
        activationType:
        configEntity:
        impl_dtype:
        use_arm32:
        cfg:

    Returns:
    '''
    if cfg is None:
        cfg = {
            'CI': tvm.var('ci'),
            'VH': 2,
            'VW': 2,
            'VC': 4,
            'VI': 4,
            'tile_oh': 2,
            'tile_ow': 2,
            'tile_co': 4,
            'ann_reduce': ['none', 'none'],
            "ann_spatial": ['none', 'none', 'none']
        }
    has_bias = hasbias
    batch = tvm.var("batch")
    in_channel = tvm.var("in_channel")
    in_height, in_width = tvm.var("in_height"), tvm.var("in_width")
    kh, kw = kernels
    ow = cfg['VW']
    oh = cfg['VH']
    oc = cfg['VC']
    op_name = "%s_ndim%d_%s_k%d_s%d_p%d%d%d%d_d%d_act%d_vc%d_vh%d_vw%d_hasbias%d" % (\
               map_conv[optype], ndim, dtype,\
               kh, strides[0], pad[0], pad[1], pad[2], pad[3], dilations[0],\
               activation_enum_map[activation_type], oc, oh, ow, hasbias)
    opname = op_name
    print("DEconv", opname, config_entity)

    if impl_dtype is None:
        impl_dtype = dtype

    out_channel = tvm.var("out_channel")

    # define placeholder
    input_tensor = in_tensor = tvm.placeholder((batch, in_channel, in_height, in_width, 4), \
                                               dtype=dtype, name='in_tensor')
    temp_tensor = kernel_tensor = tvm.placeholder((in_channel*4, out_channel, kh, kw), dtype=dtype, \
                                                  name='kernel_tensor')
    if has_bias:
        bias = tvm.placeholder((out_channel, ), dtype=dtype, name='bias')
        bias1 = topi.reshape(bias, (out_channel, 1, 1))

    if impl_dtype != dtype:
        input_tensor = AsType(input_tensor, impl_dtype)
        temp_tensor = AsType(temp_tensor, impl_dtype)
        if has_bias:
            bias1 = AsType(bias1, impl_dtype)

    # define compute & schedule
    cfg1 = (True, 1, 1, 1) if cfg is None else (True, cfg["tile_oh"],
                                                cfg["tile_ow"], cfg["tile_co"])
    out_tensor = _conv_spatial_pack_deconv(cfg1,
                                           input_tensor,
                                           temp_tensor,
                                           out_dtype=impl_dtype)

    if has_bias:
        out_tensor = tvm.compute(out_tensor.shape, lambda n, co, h, w, c4: \
            out_tensor[n, co, h, w, c4] + bias1[co*4 + c4][0][0], tag="injective")
    out_tensor = TopiActivation(out_tensor, activation_type)
    if impl_dtype != dtype:
        out_tensor = AsType(out_tensor, dtype)

    # create schedule
    if use_arm32:
        s = tvm.create_schedule(out_tensor.op)
    else:
        s = schedule_conv2d_nchw_arm_cpu_deconv(cfg, [out_tensor])

    attr = [
        batch, in_channel, in_height, in_width, out_channel, in_tensor,
        kernel_tensor
    ]
    if has_bias: attr.append(bias)
    attr.append(out_tensor)
    tensor_list = attr

    Genlib(s, tensor_list, device, opname, lib_path)
    def _interpolate(im, im_shape, x, y, out_size, dtype):
        
        num_batch = im_shape[0]
        height = im_shape[1]
        width = im_shape[2]
        channels = im_shape[3]
            
        out_height = out_size[0]
        out_width = out_size[1]
        max_y = int(im_shape[1] - 1)
        max_x = int(im_shape[2] - 1)
               
        #[-1,1] -> [0, width-1]
        x = topi.multiply(topi.add(x, tvm.const(1, dtype=dtype)), width / tvm.const(2, dtype=dtype))
        y = topi.multiply(topi.add(y, tvm.const(1, dtype=dtype)), height / tvm.const(2, dtype=dtype))
            
        # do sampling
        dim3 = out_height * out_width * num_batch
            
        x0 = topi.cast(topi.floor(x), 'int32')  
        y0 = topi.cast(topi.floor(y), 'int32')
        x1 = topi.add(x0,tvm.const(1, dtype="int32"))
        y1 = topi.add(y0,tvm.const(1, dtype="int32"))

        x0 = topi.clip(x0, 0, max_x)
        x1 = topi.clip(x1, 0, max_x)
        y0 = topi.clip(y0, 0, max_y)
        y1 = topi.clip(y1, 0, max_y)

        dim2 = width
        dim1 = width * height

        base = tvm.compute((dim3,),lambda i:(i // (out_height * out_width)) * width * height, name = 'base')        
        base_y0 = topi.add(base, topi.multiply(y0, dim2))
        base_y1 = topi.add(base, topi.multiply(y1, dim2))

        idx_a = topi.add(base_y0, x0)
        idx_b = topi.add(base_y1, x0)
        idx_c = topi.add(base_y0, x1)
        idx_d = topi.add(base_y1, x1)
                
        im_flat = topi.reshape(im, (num_batch * height * width, channels))
        im_flat = topi.cast(im_flat, dtype)
        
        Ia = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_a[i], j], name = 'Ia')       
        Ib = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_b[i], j], name = 'Ib') 
        Ic = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_c[i], j], name = 'Ic')
        Id = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_d[i], j], name = 'Id')
            
        x0_f = topi.cast(x0, dtype)
        x1_f = topi.cast(x1, dtype)
        y0_f = topi.cast(y0, dtype)
        y1_f = topi.cast(y1, dtype)
        wa = topi.expand_dims(topi.multiply(topi.subtract(x1_f, x), topi.subtract(y1_f, y)), 1)
        wb = topi.expand_dims(topi.multiply(topi.subtract(x1_f, x), topi.subtract(y, y0_f)), 1)
        wc = topi.expand_dims(topi.multiply(topi.subtract(x, x0_f), topi.subtract(y1_f, y)), 1)
        wd = topi.expand_dims(topi.multiply(topi.subtract(x, x0_f), topi.subtract(y, y0_f)), 1)
 
        output = topi.add(topi.add(topi.add(topi.multiply(wa, Ia), topi.multiply(wb, Ib)),topi.multiply(wc, Ic)), topi.multiply(wd, Id))
        
        return output
Пример #20
0
    def _interpolate(im, im_shape, x, y, out_size, dtype):

        num_batch = im_shape[0]
        height = im_shape[1]
        width = im_shape[2]
        channels = im_shape[3]

        out_height = out_size[0]
        out_width = out_size[1]
        max_y = int(im_shape[1] - 1)
        max_x = int(im_shape[2] - 1)

        # [-1,1] -> [0, width-1]
        x_temp = topi.multiply(topi.add(x, tvm.const(1, dtype=dtype)),
                               width / tvm.const(2, dtype=dtype))
        y_temp = topi.multiply(topi.add(y, tvm.const(1, dtype=dtype)),
                               height / tvm.const(2, dtype=dtype))

        # do sampling
        dim3 = out_height * out_width * num_batch

        x_zero = topi.cast(topi.floor(x_temp), 'int32')
        y_zero = topi.cast(topi.floor(y_temp), 'int32')
        x_one = topi.add(x_zero, tvm.const(1, dtype="int32"))
        y_one = topi.add(y_zero, tvm.const(1, dtype="int32"))

        x_zero = topi.clip(x_zero, 0, max_x)
        x_one = topi.clip(x_one, 0, max_x)
        y_zero = topi.clip(y_zero, 0, max_y)
        y_one = topi.clip(y_one, 0, max_y)

        dim2 = width

        base = tvm.compute((dim3, ),
                           lambda i:
                           (i // (out_height * out_width)) * width * height,
                           name='base')
        base_y0 = topi.add(base, topi.multiply(y_zero, dim2))
        base_y1 = topi.add(base, topi.multiply(y_one, dim2))

        idx_a = topi.add(base_y0, x_zero)
        idx_b = topi.add(base_y1, x_zero)
        idx_c = topi.add(base_y0, x_one)
        idx_d = topi.add(base_y1, x_one)

        im_flat = topi.reshape(im, (num_batch * height * width, channels))
        im_flat = topi.cast(im_flat, dtype)

        i_a = tvm.compute((dim3, channels),
                          lambda i, j: im_flat[idx_a[i], j],
                          name='Ia')
        i_b = tvm.compute((dim3, channels),
                          lambda i, j: im_flat[idx_b[i], j],
                          name='Ib')
        i_c = tvm.compute((dim3, channels),
                          lambda i, j: im_flat[idx_c[i], j],
                          name='Ic')
        i_d = tvm.compute((dim3, channels),
                          lambda i, j: im_flat[idx_d[i], j],
                          name='Id')

        x0_f = topi.cast(x_zero, dtype)
        x1_f = topi.cast(x_one, dtype)
        y0_f = topi.cast(y_zero, dtype)
        y1_f = topi.cast(y_zero, dtype)
        w_a = topi.expand_dims(
            topi.multiply(topi.subtract(x1_f, x_temp),
                          topi.subtract(y1_f, y_temp)), 1)
        w_b = topi.expand_dims(
            topi.multiply(topi.subtract(x1_f, x_temp),
                          topi.subtract(y_temp, y0_f)), 1)
        w_c = topi.expand_dims(
            topi.multiply(topi.subtract(x_temp, x0_f),
                          topi.subtract(y1_f, y_temp)), 1)
        w_d = topi.expand_dims(
            topi.multiply(topi.subtract(x_temp, x0_f),
                          topi.subtract(y_temp, y0_f)), 1)

        output = topi.add(
            topi.add(
                topi.add(topi.multiply(w_a, i_a), topi.multiply(w_b, i_b)),
                topi.multiply(w_c, i_c)), topi.multiply(w_d, i_d))

        return output
Пример #21
0
def compute_reshape_like(attrs, inputs, out_info):
    """Compute definition of reshape_like"""
    return topi.reshape(inputs[0], inputs[1].shape)
Пример #22
0
def compute_reshape(attrs, inputs, out_info):
    """Compute definition of reshape"""
    oshape = out_info[0].shape
    return topi.reshape(inputs[0], oshape)
Пример #23
0
    (np.zeros, 'shape'),
    (np.ones, 'shape'),
    (np.zeros, 'shape'),
    (np.zeros, 'shape'),
    (np.random.normal, 'size'),
    (np.random.normal, 'size')
]

x = tvm.placeholder((batch_size, num_timesteps * num_input), 'float32')
y = tvm.placeholder((batch_size, num_classes), 'float32')
s = tvm.placeholder((batch_size, num_hidden), 'float32')
h = tvm.placeholder((batch_size, num_hidden), 'float32')

weights = [tvm.placeholder(x, 'float32', name="weights") for x in sizes]

xs = topi.split(topi.reshape(x, (batch_size, num_timesteps, num_input)), num_timesteps, axis=1)
xs = [topi.reshape(x, (batch_size, num_input)) for x in xs]
new_s = s
new_h = h

for i in range(num_timesteps):
    inp = topi.concatenate([xs[i], new_h], 1)
    g = topi.tanh(topi.matmul(inp, weights[0]) + weights[1])
    j = topi.sigmoid(topi.matmul(inp, weights[2]) + weights[3])
    f = topi.sigmoid(topi.matmul(inp, weights[4]) + weights[5])
    o = topi.sigmoid(topi.matmul(inp, weights[6]) + weights[7])

    new_s = new_s * f + g * j
    new_h = topi.tanh(new_s) * o

logits = topi.matmul(new_h, weights[8]) + weights[9]