Пример #1
0
    def _get_pixel(n, c, y, x):
        block_offset = tvm.truncdiv(c, in_c)
        channel_idx = tvm.truncmod(c, in_c)
        x_idx = tvm.truncmod(block_offset, block_size)
        y_idx = tvm.truncdiv(block_offset, block_size)

        if layout == 'NCHW':
            output = data(n, channel_idx, y_idx + (y * block_size),
                          x_idx + (x * block_size))
        else:
            output = data(n, y_idx + (y * block_size),
                          x_idx + (x * block_size), channel_idx)
        return output
Пример #2
0
    def _get_pixel(n, c, y, x):
        block_x = tvm.truncdiv(x, block_size)
        block_y = tvm.truncdiv(y, block_size)
        idx_x = tvm.truncmod(x, block_size)
        idx_y = tvm.truncmod(y, block_size)
        if mode == "DCR":
            channel_idx = channel_factor * ((block_size * idx_y) + idx_x) + c
        else:
            channel_idx = (c * block_size * block_size) + (
                (block_size * idx_y) + idx_x)

        if layout == 'NCHW':
            output = data(n, channel_idx, block_y, block_x)
        else:
            output = data(n, block_y, block_x, channel_idx)
        return output
def test_everything_during_deduction():
    m = tvm.size_var('m')
    n = tvm.size_var('n')
    ib = tvm.ir_builder.create()
    with ib.for_range(0, n, 'i') as i:
        with ib.for_range(0, 32, 'j') as j:
            with ib.if_scope(ib.likely(tvm.truncdiv(i, j) < m)):
                # this guard will produce everything during deduction
                ib.emit(tvm.make.Evaluate(m))
    stmt = ib.get()
    stmt = tvm.ir_pass.LoopPartition(stmt, False)
    stmt = tvm.ir_pass.Simplify(stmt)
    assert (isinstance(stmt.body.body, tvm.stmt.IfThenElse))
def test_condition():
    ib = tvm.ir_builder.create()
    m = tvm.size_var('m')
    n = tvm.size_var('n')
    with ib.for_range(0, tvm.truncdiv(n + 3, 4), 'i') as i:
        with ib.for_range(0, 4, 'j') as j:
            ib.emit(
                tvm.make.Evaluate(
                    tvm.make.Select(ib.likely(i * 4 + j < n), m, n)))
    stmt = ib.get()
    stmt = tvm.ir_pass.LoopPartition(stmt, False)
    stmt = tvm.ir_pass.Simplify(stmt)
    assert (not any(
        collect_visit(stmt[0], lambda x: isinstance(x, tvm.expr.Select))))
Пример #5
0
def space_to_depth(data, block_size, layout='NCHW'):
    """Perform space to depth transformation on the data

    Parameters
    ----------
    data : tvm.Tensor
        4-D tensor in either NCHW or NHWC layout.

    block_size : int
        Size of blocks to decompose into channel dimension.

    layout : string
        Either NCHW or NHWC, indicating data layout.

    Returns
    -------
    output : tvm.Tensor
        Output of shape [N, C * block_size**2, H / block_size, W / block_size]
    """

    if layout == 'NCHW':
        in_n, in_c, in_h, in_w = data.shape
        output_shape = [
            in_n, in_c * block_size * block_size,
            tvm.truncdiv(in_h, block_size),
            tvm.truncdiv(in_w, block_size)
        ]
    elif layout == 'NHWC':
        in_n, in_h, in_w, in_c = data.shape
        output_shape = [
            in_n,
            tvm.truncdiv(in_h, block_size),
            tvm.truncdiv(in_w, block_size), in_c * block_size * block_size
        ]
    else:
        raise ValueError("Only NCHW and NHWC layouts are currently supported.")

    def _get_indices(*indices):
        if layout == 'NCHW':
            n, c, y, x = indices
        elif layout == 'NHWC':
            n, y, x, c = indices
        return n, c, y, x

    def _get_pixel(n, c, y, x):
        block_offset = tvm.truncdiv(c, in_c)
        channel_idx = tvm.truncmod(c, in_c)
        x_idx = tvm.truncmod(block_offset, block_size)
        y_idx = tvm.truncdiv(block_offset, block_size)

        if layout == 'NCHW':
            output = data(n, channel_idx, y_idx + (y * block_size),
                          x_idx + (x * block_size))
        else:
            output = data(n, y_idx + (y * block_size),
                          x_idx + (x * block_size), channel_idx)
        return output

    def _compute(*indices):
        n, c, y, x = _get_indices(*indices)
        return _get_pixel(n, c, y, x)

    return tvm.compute(output_shape,
                       _compute,
                       name='space_to_depth',
                       tag=tag.INJECTIVE)
Пример #6
0
def depth_to_space(data, block_size, layout='NCHW', mode='DCR'):
    """Perform depth to space transformation on the data

    Parameters
    ----------
    data : tvm.Tensor
        4-D tensor in either NCHW or NHWC layout.

    block_size : int
        Size of blocks to compose from channel dimension.

    layout : string
        Either NCHW or NHWC, indicating data layout.

    mode : string
        Either DCR or CDR, indicates how channels should be accessed.
        In DCR, channels are interwoven in the Tensorflow style while
        in CDR channels are accessed sequentially as in Pytorch.

    Returns
    -------
    output : tvm.Tensor
        Output of shape [N, C / block_size**2, H * block_size, W * block_size]
    """
    if layout == 'NCHW':
        in_n, in_c, in_h, in_w = data.shape
        channel_factor = tvm.truncdiv(in_c, (block_size * block_size))
        output_shape = [
            in_n, channel_factor, in_h * block_size, in_w * block_size
        ]
    elif layout == 'NHWC':
        in_n, in_h, in_w, in_c = data.shape
        channel_factor = tvm.truncdiv(in_c, (block_size * block_size))
        output_shape = [
            in_n, in_h * block_size, in_w * block_size, channel_factor
        ]
    else:
        raise ValueError("Only NCHW and NHWC layouts are currently supported.")

    def _get_indices(*indices):
        if layout == 'NCHW':
            n, c, y, x = indices
        elif layout == 'NHWC':
            n, y, x, c = indices
        return n, c, y, x

    def _get_pixel(n, c, y, x):
        block_x = tvm.truncdiv(x, block_size)
        block_y = tvm.truncdiv(y, block_size)
        idx_x = tvm.truncmod(x, block_size)
        idx_y = tvm.truncmod(y, block_size)
        if mode == "DCR":
            channel_idx = channel_factor * ((block_size * idx_y) + idx_x) + c
        else:
            channel_idx = (c * block_size * block_size) + (
                (block_size * idx_y) + idx_x)

        if layout == 'NCHW':
            output = data(n, channel_idx, block_y, block_x)
        else:
            output = data(n, block_y, block_x, channel_idx)
        return output

    def _compute(*indices):
        n, c, y, x = _get_indices(*indices)
        return _get_pixel(n, c, y, x)

    return tvm.compute(output_shape,
                       _compute,
                       name='depth_to_space',
                       tag=tag.INJECTIVE)
Пример #7
0
 def func2():
     return tvm.exp(tvm.truncdiv((x + y + 1) * y, 4))