Exemplo n.º 1
0
import KgeN
from KgeN import te

M = 128
N = 128
K = 128

A = te.placeholder((M, K), name="A")
B = te.placeholder((K, N), name="B")
k = te.reduce_axis(K, name="k")
C = te.compute((M, N),
               lambda i, j: te.reduce_sum(A[i, k] * B[k, j], axis=k),
               name="C")

s = te.create_schedule(C.op)

AA = s.cache_read(A, "shared", [C])
BB = s.cache_read(B, "shared", [C])

M, N = s[C].op.axis
K, = C.reduce_axis
Mo, Mi = s[C].split(M, 16)
No, Ni = s[C].split(N, 16)
Ko, Ki = s[C].split(K, 16)
s[C].reorder(Mo, No, Ko, Mi, Ni, Ki)
s[AA].compute_at(s[C], Ko)
s[BB].compute_at(s[C], Ko)
func = KgeN.lower(s, [A, B, C])
print(KgeN.build(func))
Exemplo n.º 2
0
A = te.placeholder((in_size, in_size, in_channel, batch), name="A")
W = te.placeholder((kernel, kernel, in_channel, out_channel), name="W")
out_size = (in_size - kernel + 2 * pad) // stride + 1
# Pad input
Apad = te.compute(
    (in_size + 2 * pad, in_size + 2 * pad, in_channel, batch),
    lambda yy, xx, cc, nn: te.if_then_else(
        te.all(yy >= pad, yy - pad < in_size, xx >= pad, xx - pad < in_size),
        A[yy - pad, xx - pad, cc, nn],
        0,
    ),
    name="Apad",
)
# Create reduction variables
rc = te.reduce_axis(in_channel, name="rc")
ry = te.reduce_axis(kernel, name="ry")
rx = te.reduce_axis(kernel, name="rx")
# Compute the convolution
B = te.compute(
    (out_size, out_size, out_channel, batch),
    lambda yy, xx, ff, nn: te.reduce_sum(
        Apad[yy * stride + ry, xx * stride + rx, rc, nn] * W[ry, rx, rc, ff], axis=(ry, rx, rc)
    ),
    name="B",
)


# schedule
s = te.create_schedule(B.op)