Exemplo n.º 1
0
def compute_blockdim(shape, axis, dtype):
    # strategy: all the shape except reduce axis can be used for multicore
    blockdim_limit = 2 if utils.product_is_mini() else 32
    blockdim = 1
    if isinstance(shape, int):
        shape = [shape]
    if not isinstance(axis, list):
        axis = list(axis)
    for a in axis:
        if a < 0:
            a += len(shape)
    axis = sorted(axis)
    red_sh = 1
    if isinstance(shape, (list, tuple)):
        for i, sh in enumerate(shape):
            if not isinstance(sh, int):
                raise TypeError(
                    "Shape to compute blockdim must be a list/tuple of integer"
                )
            if i in axis:
                red_sh *= sh
            else:
                blockdim = blockdim * sh
    else:
        raise TypeError(
            "Shape to compute blockdim must be a list/tuple of integer")
    if red_sh < 32 / get_bytes(dtype):
        # when reduce axis is too small, multicore may not always increase performace
        blockdim = 1

    return min(blockdim_limit, blockdim)
Exemplo n.º 2
0
def shape_dtype_max_size_check(shape, dtype):
    """check validation of tensor's shape."""
    if shape:
        for x in shape:
            if not isinstance(x, int):
                return
        mul = get_bytes(dtype) * int(reduce(lambda x, y: int(x) * int(y), shape))
        if mul > MAX_DATA_SIZE:
            error_msg = "*".join([str(sh) for sh in shape])
            raise RuntimeError("Invalid shape, data is {} bytes ({}), which "
                               "exceed max data size {} bytes"
                               .format(mul, error_msg, MAX_DATA_SIZE))
Exemplo n.º 3
0
    def conv_shape_check(shape):
        if (not isinstance(shape, (tuple, list))) or (len(shape) != 4):
            raise RuntimeError("conv tensor shape should be 4d list or tuple")

        conv_dtype = "float16"
        size = get_bytes(conv_dtype)
        for i in shape:
            if (not isinstance(i, int)) or (i <= 0):
                raise RuntimeError("conv tensor shape should be 4d list or "
                                   "tuple of positive integer")
            size *= i

        if size > MAX_DATA_SIZE:
            raise RuntimeError("runtime can not support tensor more than 2G size")
Exemplo n.º 4
0
def get_input_pad_shape(shape, dtype):
    """Function for getting input pad shape."""
    pad_unit = ft_util.get_bytes(dtype, allow_none=True)

    if pad_unit is None:
        logging.warning(
            "%s is not support in TensorAddPad, the result is not undefined.",
            dtype)
        return shape

    lastdim = int(math.ceil(shape[-1] / pad_unit) * pad_unit)
    pad_shape = [*shape[:-1], '{},{}'.format(shape[-1], lastdim)
                 ] if lastdim != shape[-1] else shape

    return pad_shape
Exemplo n.º 5
0
def compute_blockdim(shape, axis, dtype):
    # strategy: all the shape before reduce axis can be used for multicore
    blockdim_limit = 2 if utils.product_is_mini() else 32
    blockdim = 1
    if isinstance(shape, int):
        shape = [shape]
    if axis < 0:
        axis += len(shape)
    if isinstance(shape, (list, tuple)):
        for i, sh in enumerate(shape):
            if not isinstance(sh, int):
                raise TypeError("Shape to compute blockdim must be a list/tuple of integer")
            if i == axis:
                if sh < 32 / get_bytes(dtype):
                    # when reduce axis is too small, multicore may not always increase performace
                    blockdim = 1
                break
            blockdim = blockdim * sh
    else:
        raise TypeError("Shape to compute blockdim must be a list/tuple of integer")
    return min(blockdim_limit, blockdim)
Exemplo n.º 6
0
def four2five(data, format_, dst_dtype='float16', need_custom_tiling=True):
    """
    Convert 4-dims "data" to 5-dims,the format of "data" is defined in "format_"

    Args:
        data (tvm.tensor.Tensor): 4-dims tensor of type float16, float32
        format_ (str): a str defined the format of "data"
        dst_dtype (str): a str defined the type of output, could be float16 or float32

    Returns:
        5-dims tvm.tensor.Tensor,type is defined by dst_dtype,
        which shape is [N, ceil(C / 16), H, W, 16] and attr about tiling args

    Raises:
        ValueError: If the type of format_ is invalid.

    """
    # Check dtype
    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    # Check shape
    shape = get_shape(data)
    vc_util.davinci_format_check(shape, format_, dim=4)

    # Check format
    if format_ not in ['NCHW', 'NHWC']:
        raise ValueError(
            "{} format is not support, four2five only support NCHW and NHWC format input"
            .format(format_))
    last_channel = 16
    if format_ == "NCHW":
        bs, c, h, w = get_shape(data)
    else:
        bs, h, w, c = get_shape(data)
    pad_c = c
    if c % last_channel != 0:
        pad_c = (c + 15) // last_channel * last_channel
    c1 = pad_c // last_channel
    c0 = last_channel
    is_dynamic = ds.shape_is_dynamic(data)
    if not is_dynamic:
        attrs = get_attrs()
    else:
        attrs = get_dynamic_attrs()
    # Check size c when casting happens
    if data.dtype != dst_dtype and c0 * c1 >= C_LIMIT_FOR_CAST:
        raise ValueError(
            "When input and output data type is not matched, shape of 'c' axis should not exceed {}, "
            "while currently set is {}".format(C_LIMIT_FOR_CAST, c0 * c1))

    @script(capture=locals())
    def nchw_to_nc1hwc0_step(inputs, bs, c1, h, w, c0):
        output = allocate((bs, c1, h, c0, w), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            output[n_i, c_i, h_i, c_i0,
                                   w_i] = inputs[n_i,
                                                 c_i * last_channel + c_i0,
                                                 h_i, w_i]
        output1 = allocate((bs, c1, h, w, c0), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            output1[n_i, c_i, h_i, w_i,
                                    c_i0] = output[n_i, c_i, h_i, c_i0, w_i]
        return output1

    @script(capture=locals())
    def nchw_to_nc1hwc0(inputs, bs, c1, h, w, c0):
        output = allocate((bs, c1, h, w, c0), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            output[n_i, c_i, h_i, w_i,
                                   c_i0] = inputs[n_i,
                                                  c_i * last_channel + c_i0,
                                                  h_i, w_i]
        return output

    @script(capture=locals())
    def nhwc_to_nc1hwc0(inputs, zero, bs, c1, h, w, c0):
        output = allocate((bs, c1, h, w, c0), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            if c_i * last_channel + c_i0 < c:
                                output[n_i, c_i, h_i, w_i,
                                       c_i0] = inputs[n_i, h_i, w_i,
                                                      c_i * last_channel +
                                                      c_i0]
                            else:
                                output[n_i, c_i, h_i, w_i, c_i0] = zero

        return output

    cast_data = data
    need_cast = data.dtype == 'float32' and dst_dtype == 'float16'
    if c % last_channel != 0 or need_cast:
        expansion = int(ct_util.BLOCK_SIZE / get_bytes(data.dtype))
    else:
        expansion = None
    # float32 -> float16, need to cast before transform
    if need_cast:
        cast_data = akg.lang.cce.cast_to(data, dst_dtype)

    zero_ = akg.tvm.const(0.0, cast_data.dtype)
    if format_ == "NCHW":
        if c % last_channel != 0:
            pad_shape = [bs, pad_c, h, w]
            if h == 1 and w == 1:
                # if h and w both are 1, it is pad last dim case
                output_shape = [bs, pad_c // last_channel, h, w, last_channel]

                output = akg.tvm.compute(
                    output_shape,
                    lambda i, c1, k, l, c0: akg.tvm.expr.Select(
                        c0 < c - c1 * last_channel, cast_data[
                            i, c1 * last_channel + c0, k, l],
                        akg.tvm.const(0, cast_data.dtype)),
                    name="output")
            else:
                # if need to pad c dim, separate transpose to two steps
                # first is nchw -> nc1hc0w, second is nc1hc0w -> nc1hwc0
                pad_data = akg.tvm.compute(
                    pad_shape,
                    lambda i, j, k, l: akg.tvm.expr.Select(
                        j < c, cast_data[i, j, k, l], zero_),
                    name="pad_data")
                output = nchw_to_nc1hwc0_step(pad_data, to_tvm_const(bs),
                                              to_tvm_const(c1),
                                              to_tvm_const(h), to_tvm_const(w),
                                              to_tvm_const(c0))

        else:
            if not is_dynamic and data.dtype == "float16" and h * w % last_channel == 0 and h * w < 3600:
                output_shape = [bs, c1, h, w, c0]
                output = akg.tvm.compute(
                    output_shape,
                    lambda n, c1, h, w, c0: akg.lang.cce.four2five_nchw(
                        cast_data[n, c1 * last_channel + c0, h, w]),
                    name="output")

            else:
                output = nchw_to_nc1hwc0(cast_data, to_tvm_const(bs),
                                         to_tvm_const(c1), to_tvm_const(h),
                                         to_tvm_const(w), to_tvm_const(c0))

    else:
        if not is_dynamic and c < last_channel:
            rank = 5  # (n, c1, h, w, c0)
            pad_before = []
            pad_after = []
            for _ in range(rank):
                pad_before.append(0)
                pad_after.append(0)
            pad_after[-1] = last_channel - c
            # As c < last_channel, c1 is 1
            output = akg.tvm.compute(
                (bs, c1, h, w, c),
                lambda bs_i, _, h_i, w_i, c_i: cast_data[bs_i, h_i, w_i, c_i],
                name="output")
            output = tvm_pad(output,
                             pad_before,
                             pad_after=pad_after,
                             name='pad_output')
        else:
            output = nhwc_to_nc1hwc0(cast_data, zero_, to_tvm_const(bs),
                                     to_tvm_const(c1), to_tvm_const(h),
                                     to_tvm_const(w), to_tvm_const(c0))

    # float16 -> float32, need to cast after transform
    if data.dtype == 'float16' and dst_dtype == 'float32':
        output = akg.lang.cce.cast_to(output, dst_dtype)

    vc_util.davinci_format_check(output.shape, "NC1HWC0", dim=5)

    if not is_dynamic:
        dim_info, _ = four2five_set_dim_func(data, format_, dst_dtype)
        if dim_info != "":
            attrs["dim"] = dim_info
        if need_custom_tiling:
            attrs["custom_tiling"] = four2five_tiling_strategy(
                output, format_, expansion)
    elif need_custom_tiling:
        attrs["custom_tiling"] = four2five_tiling_strategy_dynamic(
            output, format_)

    if is_dynamic:
        attrs["enable_feature_library_pre_poly"] = True
    return output, attrs
Exemplo n.º 7
0
def gen_random_shape(shape_dim, slope=0, min_value=None, max_value=None):
    """
    Generate a list of random integer with length equals shape_dim within range [min_value, max_value];

    Args:
        shape_dim : length of output random shape
        slope : only represents the tendency of random shape's value, not mathematical slope of random shape;
                slope = -1 tend to generate random shape list with largest value at the beginning and smallest value at the end
                slope = 0 tend to generate random shape list with nearly average value among list
                slope = 1 tend to generate random shape list with smallest value at the beginning and largest value at the end
    """
    if shape_dim <= 0:
        raise ValueError("Shape dim should be positive.")

    def _build_limit(limit, default):
        if limit is None:
            limit = default
        res = list()
        nonlocal shape_dim
        if isinstance(limit, (tuple, list)):
            if len(limit) != shape_dim:
                raise ValueError(
                    "Min/Max value should have same length with shape_dim")
            res = limit
        elif isinstance(limit, int):
            res = [limit] * shape_dim
        else:
            raise TypeError(
                "Min/Max value should be int or list of int with same length of shape_dim"
            )
        return res

    device_limit = MAX_DATA_SIZE // get_bytes("float32")
    if max_value is None and shape_dim > 1:
        limit_avg = int(math.pow(device_limit, 1 / shape_dim))

        if slope == 0:
            max_value = [limit_avg] * shape_dim
        else:
            ratio = np.arange(-1 / 2, 1 / 2 + 1 / shape_dim, 1 / shape_dim)
            if len(ratio) > shape_dim:
                new_ratio = list()
                for i, r in enumerate(ratio):
                    if i == len(ratio) // 2 - 1:
                        new_ratio.append(0)
                    elif i == len(ratio) // 2:
                        continue
                    else:
                        new_ratio.append(r)
                ratio = new_ratio
            if slope == -1:
                ratio.reverse()
            max_value = list()
            for i, r in enumerate(ratio):
                max_value.append(int((1 + ratio[i]) * limit_avg))

    shape_min = _build_limit(min_value, 1)
    shape_extent = _build_limit(max_value, device_limit)
    random_shape = list()
    for mn, mx in zip(shape_min, shape_extent):
        random_shape.append(random.randint(mn, mx))
    return random_shape
Exemplo n.º 8
0
def five2four(data, shape4d, dst_type, format_):
    """
    Convert 5-dims "data" to 4-dims,the format of "data" is defined in "format_"

    Args:
        data (tvm.tensor.Tensor): 5-dims tensor of type float16, float32
        shape4d (Union[list, tuple]): a list has 4 nums, shape of output Tensor
        dst_type (str): data type of output Tensor
        format_ (str): a str defined the format of returns, support NCHW and NHWC

    Returns:
        4-dims tvm.tensor.Tensor.

    """
    vc_util.ops_dtype_check([data.dtype, dst_type],
                            vc_util.DtypeForDavinci.ALL_FLOAT)
    shape5d = get_shape(data)
    if not shape_is_dynamic(data):
        if len(shape5d) != 5 or shape5d[-1] != 16:
            raise ValueError(
                "five2four_cce only support 5-dim data and last dim should be 16"
            )

    bs, c1, h, w, c0 = shape5d
    if not shape_is_dynamic(data):
        vc_util.davinci_format_check(shape5d, "NC1HWC0", dim=5)
    # Check format
    if format_ not in ['NCHW', 'NHWC']:
        raise ValueError(
            "{} format is not support, five2four only support NCHW and NHWC format input"
            .format(format_))
    if format_ == "NCHW":
        if shape_is_dynamic(data):
            shape4d = [bs, c1 * c0, h, w]
        _, c, h_4d, w_4d = shape4d
    else:
        if shape_is_dynamic(data):
            shape4d = [bs, h, w, c1 * c0]
        _, h_4d, w_4d, c = shape4d
    vc_util.davinci_format_check(shape4d, format_, dim=4)

    # Check is shape4d and shape5d match
    if False not in [
            isinstance(s, (int, akg.tvm.expr.IntImm)) for s in shape5d
    ]:
        if h_4d != h or w_4d != w:
            raise ValueError(
                "five2four_cce's shape4d h and w should equal to data shape's h and w"
            )
        if c > c1 * c0 or c <= (c1 - 1) * c0:
            raise ValueError(
                "five2four_cce's shape4d c should in set ((c1 - 1) * c0, c1 * c0]"
            )

    # Check size c when casting happens
    if not shape_is_dynamic(data):
        if data.dtype != dst_type and c >= C_LIMIT_FOR_CAST:
            raise ValueError(
                "When input and output data type is not matched, shape of 'c' axis should not exceed {}, "
                "while currently set is {}".format(C_LIMIT_FOR_CAST, c))

    @script(capture=locals())
    def nc1hwc0_to_nhwc(inputs, bs, h, w, c, c1, c0):
        output = allocate((bs, h, w, c), inputs.dtype, "local")
        for n_i in range(bs):
            for h_i in range(h):
                for w_i in range(w):
                    for c_i in range(c1):
                        for c_i0 in range(c0):
                            output[n_i, h_i, w_i,
                                   c_i * c0 + c_i0] = inputs[n_i, c_i, h_i,
                                                             w_i, c_i0]
        return output

    @script(capture=locals())
    def nc1hwc0_to_nchw(inputs, bs, h, w, c, c1, c0):
        output = allocate((bs, c, h, w), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            output[n_i, c_i * c0 + c_i0, h_i,
                                   w_i] = inputs[n_i, c_i, h_i, w_i, c_i0]
        return output

    # if c % 16 == 0, h and w == 1, five2four is a reshape operation
    if shape_is_dynamic(data):
        call_reshape = isinstance(h, int) and isinstance(
            w, int) and h == 1 and w == 1
    else:
        call_reshape = h == 1 and w == 1 and c % 16 == 0
    c_value = None
    expansion = None
    if format_ == "NHWC":
        if call_reshape:
            output = akg.topi.reshape(data, (bs, h, w, c))
            if shape_is_dynamic(data):
                output = akg.tvm.compute((bs, h, w, c),
                                         lambda *indice: output(*indice),
                                         name="reshape")
        elif c < c0:
            reshape_output = akg.topi.reshape(data, (bs, h, w, c0))
            output = akg.tvm.compute((bs, h, w, c),
                                     lambda *i: reshape_output(*i),
                                     name='slice_output')
        else:
            output = nc1hwc0_to_nhwc(data, to_tvm_const(bs), to_tvm_const(h),
                                     to_tvm_const(w), to_tvm_const(c),
                                     to_tvm_const(c1), to_tvm_const(c0))

    else:
        if call_reshape:
            output = akg.topi.reshape(data, (bs, c, h, w))
            if shape_is_dynamic(data):
                output = akg.tvm.compute((bs, c, h, w),
                                         lambda *indice: output(*indice),
                                         name="reshape")
        else:
            output = nc1hwc0_to_nchw(data, to_tvm_const(bs), to_tvm_const(h),
                                     to_tvm_const(w), to_tvm_const(c),
                                     to_tvm_const(c1), to_tvm_const(c0))

    # two special cases for tiling strategy
    if not shape_is_dynamic(data):
        if c < c0 or output.dtype != dst_type:
            c_value = c
        if c % c0 != 0 and output.dtype != dst_type:
            expansion = int(ct_util.BLOCK_SIZE / get_bytes(data.dtype))
    attrs = get_attrs()
    if not call_reshape:
        attrs["custom_tiling"] = five2four_tiling_strategy(
            data, c_value, expansion)

    if output.dtype != dst_type:
        output = akg.topi.cast(output, dst_type)
    return output, attrs