Exemplo n.º 1
0
def BaseImplementation(input_tensor, temp_tensor, get_input, layout, padding):
    temp_tensor = topi.flip(temp_tensor, axis=-1)
    temp_tensor = topi.flip(temp_tensor, axis=-2)
    temp_tensor = topi.transpose(temp_tensor, axes=(1, 0, 2, 3))
    out_tensor = topi.nn.conv2d(get_input(input_tensor, temp_tensor, padding),
                                temp_tensor, (1, 1),
                                padding, (1, 1),
                                layout=layout,
                                out_dtype=input_tensor.dtype)
    return out_tensor
Exemplo n.º 2
0
def verify_flip(in_shape, axis):
    A = tvm.placeholder(shape=in_shape, name="A")
    B = topi.flip(A, axis) + 1
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_injective(B)

        foo = tvm.build(s, [A, B], device, name="reverse")
        x_np = np.random.uniform(size=in_shape).astype(A.dtype)
        out_npy = np.flip(x_np, axis) + 1
        data_nd = tvm.nd.array(x_np, ctx)
        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=A.dtype)
        foo(data_nd, out_nd)
        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "cuda", "opencl", "sdaccel", "aocl_sw_emu"]:
        check_device(device)
Exemplo n.º 3
0
def verify_flip(in_shape, axis):
    A = tvm.placeholder(shape=in_shape, name="A")
    B = topi.flip(A, axis) + 1
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_injective(B)

        foo = tvm.build(s, [A, B], device, name="reverse")
        x_np = np.random.uniform(size=in_shape).astype(A.dtype)
        out_npy = np.flip(x_np, axis) + 1
        data_nd = tvm.nd.array(x_np, ctx)
        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=A.dtype)
        foo(data_nd, out_nd)
        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "cuda", "opencl", "sdaccel", "aocl_sw_emu"]:
        check_device(device)
Exemplo n.º 4
0
def ConvVar(device="llvm", lib_path="./", optype=None,\
            ndim=None, layout=None, dtype=None, kernels=None,\
            strides=None, pad=None, dilations=None,\
            hasbias=None, activation_type=None,\
            config_entity=None, impl_dtype=None, channel_multiplier=None,\
            use_arm32=False, cfg=None):
    '''
    convolution
    Args:
        device:
        lib_path:
        optype:
        ndim:
        layout:
        dtype:
        kernels:
        strides:
        pad:
        dilations:
        hasbias:
        activationType:
        configEntity:
        impl_dtype:
        channel_multiplier:
        use_arm32:
        cfg:

    Returns:
    '''
    use_depthwise = optype == 'ConvolutionDepthwise'
    use_deconv = optype == 'Deconvolution'
    use_deconv_depthwise = optype == 'DeConvolutionDepthwise'
    has_bias = hasbias

    ow = 1 if cfg is None else cfg['VW']
    oh = 1 if cfg is None else cfg['VH']
    oc = 1 if cfg is None else cfg['VC']
    kh, kw = kernels
    op_name = "%s_ndim%d_%s_k%d_s%d_p%d%d%d%d_d%d_act%d_vc%d_vh%d_vw%d_hasbias%d" % ( \
              map_conv[optype], ndim, dtype, \
              kh, strides[0], pad[0], pad[1], pad[2], pad[3], dilations[0], \
              activation_enum_map[activation_type], oc, oh, ow, hasbias)
    batch = tvm.var("batch")
    in_channel = tvm.var("in_channel")
    in_height, in_width = tvm.var("in_height"), tvm.var("in_width")
    pad_up, pad_down, pad_left, pad_right = pad
    opname = op_name

    print("Conv", opname, config_entity)

    if impl_dtype is None:
        impl_dtype = dtype

    if use_depthwise:
        multiplier = channel_multiplier
        out_channel = in_channel * multiplier
    elif use_deconv_depthwise:
        multiplier = channel_multiplier
        out_channel = in_channel * multiplier
    else:
        out_channel = tvm.var("out_channel")

    # define placeholder
    input_tensor = in_tensor = tvm.placeholder(
        (batch, in_channel, in_height, in_width),
        dtype=dtype,
        name='in_tensor')

    if use_depthwise:
        temp_tensor = kernel_tensor = tvm.placeholder((in_channel, multiplier, kh, kw), dtype=dtype,\
                                                      name='kernel_tensor')
    elif use_deconv:
        temp_tensor = kernel_tensor = tvm.placeholder((in_channel, out_channel, kh, kw), dtype=dtype,\
                                                      name='kernel_tensor')
    elif use_deconv_depthwise:
        temp_tensor = kernel_tensor = tvm.placeholder((in_channel, multiplier, kh, kw), dtype=dtype,\
                                                      name='kernel_tensor')
    else:
        temp_tensor = kernel_tensor = tvm.placeholder((out_channel, in_channel, kh, kw), dtype=dtype,\
                                                      name='kernel_tensor')
    if has_bias:
        bias = tvm.placeholder((out_channel, ), dtype=dtype, name='bias')
        bias1 = topi.reshape(bias, (out_channel, 1, 1))

    if impl_dtype != dtype:
        input_tensor = AsType(input_tensor, impl_dtype)
        temp_tensor = AsType(temp_tensor, impl_dtype)
        if has_bias:
            bias1 = AsType(bias1, impl_dtype)

    # define compute & schedule
    if pad_up != pad_down or pad_left != pad_right:
        input_tensor = topi.nn.pad(input_tensor, [0, 0, pad_up, pad_left],
                                   [0, 0, pad_down, pad_right],
                                   name='data_pad')
        padding = 0, 0
    else:
        padding = pad_up, pad_left
    if use_depthwise:
        cfg1 = (True, 1, 1,
                1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"],
                                        cfg["tile_co"])
        out_tensor = _depthwise_spatial_pack(cfg1, input_tensor, temp_tensor, strides, padding, dilations,\
                                             out_dtype=impl_dtype)
    elif use_deconv:

        def GetInput(input_tensor, temp_tensor, padding):
            _, out_c, filter_h, filter_w = temp_tensor.shape
            if out_c is None:
                print("temp_tensor.shape err")
            stride_h, stride_w = strides
            # dilate stage
            dilated_input = topi.nn.dilate(input_tensor,
                                           [1, 1, stride_h, stride_w],
                                           name='DilatedInput')
            # padding stage
            fpad_top, fpad_left, fpad_bottom, fpad_right = topi.nn.get_pad_tuple(
                padding, (filter_h, filter_w))
            bpad_top = filter_h - 1 - fpad_top
            bpad_bottom = filter_h - 1 - fpad_bottom
            bpad_left = filter_w - 1 - fpad_left
            bpad_right = filter_w - 1 - fpad_right
            padded_input = topi.nn.pad(dilated_input, \
                                      [0, 0, bpad_top, bpad_left], \
                                      [0, 0, bpad_bottom, bpad_right], \
                                      name='PaddedInput')
            return padded_input

        special_deconv = kh == 2 and kw == 2 and strides[0] == 2 and strides[
            1] == 2
        # special_deconv = False
        if special_deconv:
            out_tensor = OptimalOut(input_tensor, temp_tensor, in_channel)
        else:
            out_tensor = BaseImplementation(input_tensor, temp_tensor,
                                            GetInput, layout, padding)
    elif use_deconv_depthwise:

        def GetInput(input_tensor, temp_tensor, padding):
            _, out_c, filter_h, filter_w = temp_tensor.shape
            if out_c is None:
                print("temp_tensor.shape err")
            stride_h, stride_w = strides
            # dilate stage
            dilated_input = topi.nn.dilate(input_tensor,
                                           [1, 1, stride_h, stride_w],
                                           name='DilatedInput')
            # padding stage
            fpad_top, fpad_left, fpad_bottom, fpad_right = topi.nn.get_pad_tuple(
                padding, (filter_h, filter_w))
            bpad_top = filter_h - 1 - fpad_top
            bpad_bottom = filter_h - 1 - fpad_bottom
            bpad_left = filter_w - 1 - fpad_left
            bpad_right = filter_w - 1 - fpad_right
            padded_input = topi.nn.pad(dilated_input, \
                                      [0, 0, bpad_top, bpad_left], \
                                      [0, 0, bpad_bottom, bpad_right], \
                                      name='PaddedInput')
            return padded_input

        temp_tensor = topi.flip(temp_tensor, axis=-1)
        temp_tensor = topi.flip(temp_tensor, axis=-2)
        out_tensor = topi.nn.depthwise_conv2d_nchw(GetInput(input_tensor, temp_tensor, padding), temp_tensor, (1, 1), \
                                                   padding, (1, 1), out_dtype=input_tensor.dtype)
    else:
        cfg1 = (True, 1, 1,
                1) if cfg is None else (True, cfg["tile_oh"], cfg["tile_ow"],
                                        cfg["tile_co"])
        out_tensor = _conv_spatial_pack_asm(cfg1, input_tensor, temp_tensor, strides, padding, dilations,\
                                            out_dtype=impl_dtype)

    if has_bias:
        out_tensor = tvm.compute(out_tensor.shape, lambda n, co, h, w: out_tensor[n, co, h, w] + bias1[co][0][0],\
                                 tag="injective")
    out_tensor = TopiActivation(out_tensor, activation_type)
    if impl_dtype != dtype:
        out_tensor = AsType(out_tensor, dtype)

    # create schedule
    if use_arm32:
        s = tvm.create_schedule(out_tensor.op)
    elif use_depthwise:
        s = schedule_depthwise_conv2d_nchw_arm(cfg, [out_tensor])
    elif use_deconv:
        if special_deconv:
            s = tvm.create_schedule([out_tensor.op])
        else:
            s = topi.generic.schedule_conv2d_nchw([out_tensor])
    elif use_deconv_depthwise:
        s = tvm.create_schedule([out_tensor.op])
    else:
        s = schedule_conv2d_nchw_arm_cpu([out_tensor])

    # generate lib
    attr = [
        batch, in_channel, in_height, in_width, out_channel, in_tensor,
        kernel_tensor
    ]
    tensor_list = [*attr, bias, out_tensor
                   ] if has_bias else [*attr, out_tensor]
    Genlib(s, tensor_list, device, opname, lib_path)