Пример #1
0
def pad(data, pad_before, pad_after=None, pad_value=0.0, name="pad"):
    n = len(data.shape)
    pad_after = pad_after if pad_after else pad_before
    if len(pad_before) != n:
        raise ValueError("Input dimension and pad_before dismatch : %d vs %d" %
                         (n, len(pad_before)))
    if len(pad_after) != n:
        raise ValueError("Input dimension and pad_after dismatch : %d vs %d" %
                         (n, len(pad_after)))
    out_shape = tuple(
        tvm.ir_pass.Simplify((data.shape[i] +
                              tvm.const(pad_before[i] + pad_after[i])))
        for i in range(n))
    pad_value = pad_value if isinstance(
        pad_value, tvm.expr.Expr) else tvm.const(pad_value, data.dtype)

    def _pad(*indices):
        not_zero = []
        index_tuple = []
        for i in range(n):
            if pad_before[i] == 0 and pad_after[i] == 0:
                index_tuple.append(indices[i])
            else:
                index_tuple.append(indices[i] - pad_before[i])
                not_zero.append(indices[i] >= pad_before[i])
                not_zero.append(indices[i] < data.shape[i] + pad_before[i])
        if not_zero:
            not_zero = tvm.all(*not_zero)
            return tvm.select(not_zero, data[tuple(index_tuple)], pad_value)
        return data[tuple(index_tuple)]

    return hcl.compute(out_shape, _pad, name=name)
Пример #2
0
def avg_pool2d_nhwc(data,
                    pooling,
                    stride=[1, 1],
                    padding=[0, 0],
                    name='avg_pool2d'):
    assert len(data.shape) == 4, "only support 4-dim pooling"
    assert len(stride) == 2, "only support 2-dim stride"
    pooling_h, pooling_w = pooling
    stride_h, stride_w = stride
    batch, height, width, channel = data.shape
    pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(
        padding, (pooling_h, pooling_w))
    pad_before = [0, pad_top, pad_left, 0]
    pad_after = [0, pad_bottom, pad_right, 0]
    data = pad(data,
               pad_before,
               pad_after,
               pad_value=tvm.const(0.0, data.dtype))
    out_height = simplify((height - pooling_h + pad_top + pad_bottom) //
                          stride_h + 1)
    out_width = simplify((width - pooling_w + pad_left + pad_right) //
                         stride_w + 1)
    dheight = hcl.reduce_axis(0, pooling_h)
    dwidth = hcl.reduce_axis(0, pooling_w)
    return hcl.compute(
        (batch, out_height, out_width, channel),
        lambda i, h, w, c: sum(
            data[i, h * stride_h + dheight, w * stride_w + dwidth, c],
            axis=[dheight, dwidth]) / (pooling_w * pooling_h),
        name=name,
        attrs=OrderedDict([('out_img_w', out_width), ('out_img_h', out_height),
                           ('in_num', channel), ('kernel_h', pooling[1]),
                           ('kernel_w', pooling[0]), ('stride_h', stride[1]),
                           ('stride_w', stride[0]),
                           ('app_name', tvm.make.StringImm('avg_pool'))]))
Пример #3
0
def pad(data, pad_before, pad_after=None, pad_value=0.0, name='pad'):
    n = len(data.shape)
    pad_after = pad_after if pad_after else pad_before
    out_shape = tuple(
        tvm.ir_pass.Simplify((data.shape[i] + tvm.const(pad_before[i]) +
                              tvm.const(pad_after[i]))) for i in range(n))

    def _pad(*indices):
        not_zero = []
        index_tuple = []
        for i in range(n):
            if equal_const_int(pad_before[i], 0) and equal_const_int(
                    pad_after[i], 0):
                index_tuple.append(indices[i])
            else:
                index_tuple.append(indices[i] - pad_before[i])
                not_zero.append(indices[i] >= pad_before[i])
                not_zero.append(indices[i] < data.shape[i] + pad_before[i])
        if not_zero:
            not_zero = tvm.all(*not_zero)
            return tvm.select(not_zero, data[tuple(index_tuple)], pad_value)
        return data[tuple(index_tuple)]

    return hcl.compute(out_shape, _pad, name=name)
Пример #4
0
def test_build_from_stmt():
    hcl.init(hcl.Int())
    # First, we still need to create HeteroCL inputs
    A = hcl.placeholder((10,), "A")
    B = hcl.placeholder((10,), "B")
    X = hcl.placeholder((), "X") # a scalar input

    # Second, we create variables for loop var
    # The first field is the name
    # The second field is the data type
    i = tvm._api_internal._Var("i", "int32")

    # Similarly, we can create a variable for intermediate tensor
    C = tvm._api_internal._Var("C", "int32")

    # Third, we can create Load
    # If we are accessing the HeteroCL inputs, we need to use ".buf.data"
    load = tvm.make.Load("int32", A.buf.data, i)

    # Fourth, for arithmatic operation, we can add "False" to the end
    # This avoids automatic casting
    add = tvm.make.Add(load, 1, False)

    # Fifth, we can create Store
    # In this case, we just write to the intermediate tensor
    # Thus, we don't need to use ".buf.data"
    store = tvm.make.Store(C, add, i)

    # Sixth, we can create the loop with our loop var
    # For the details of each field, please refer to IR.h under HalideIR/src/ir
    loop = tvm.make.For(i, 0, 10, 0, 0, store)

    # Finally, we need to allocate memory for our intermediate tensor
    alloc = tvm.make.Allocate(C, "int32", [10], tvm.const(1, "uint1"), loop, [])

    # Similarly, we can do another loop that write stuffs to B
    # Note that this i is a newly allocated variable though the name is the same
    # We cannot reuse the same i for different loops
    i = tvm._api_internal._Var("i", "int32")
    load = tvm.make.Load("int32", C, i)
    mul = tvm.make.Mul(load, X, False)
    store = tvm.make.Store(B.buf.data, mul, i)
    loop = tvm.make.For(i, 0, 10, 0, 0, store)
    stmt = tvm.make.Block(alloc, loop)

    # Finally, we just need to use HeteroCL APIs to build the function
    # Note that with this approach, we cannot apply any optimizations with primitives
    s = hcl.create_schedule([A, B, X])
    # Just specify the stmt to be the statement we built
    f = hcl.build(s, stmt=stmt)

    # A simple test
    np_A = np.random.randint(10, size=10)
    np_B = np.random.randint(10, size=10)
    hcl_A = hcl.asarray(np_A)
    hcl_B = hcl.asarray(np_B)

    f(hcl_A, hcl_B, 5)

    np_golden = 5 * (np_A + 1)
    np_B = hcl_B.asnumpy()

    assert(np.array_equal(np_B, np_golden))