def test_conv2D_lb(): hcl.init() A = hcl.placeholder((10, 10)) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) B = hcl.compute((8, 8), lambda y, x: hcl.sum(A[y + r, x + c], axis=[r, c])) s = hcl.create_schedule([A, B]) LB = s.reuse_at(A, s[B], B.axis[0]) f = hcl.build(s) np_A = np.random.randint(0, 10, size=(10, 10)) np_B = np.zeros((8, 8), dtype="int") np_C = np.zeros((8, 8), dtype="int") for y in range(0, 8): for x in range(0, 8): for r in range(0, 3): for c in range(0, 3): np_C[y][x] += np_A[y + r][x + c] hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) f(hcl_A, hcl_B) np_B = hcl_B.asnumpy() assert np.array_equal(np_B, np_C)
def test_reuse_blur_x(): hcl.init() A = hcl.placeholder((10, 10), name="A") B = hcl.compute((10, 8), lambda y, x: A[y, x] + A[y, x + 1] + A[y, x + 2]) s = hcl.create_schedule([A, B]) RB = s.reuse_at(A, s[B], B.axis[1]) # print(s[B].op.body) f = hcl.build(s) np_A = np.random.randint(0, 10, size=(10, 10)) np_B = np.zeros((10, 8), dtype="int") np_C = np.zeros((10, 8), dtype="int") for y in range(0, 10): for x in range(0, 8): np_C[y][x] = np_A[y][x] + np_A[y][x + 1] + np_A[y][x + 2] hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) f(hcl_A, hcl_B) np_B = hcl_B.asnumpy() assert np.array_equal(np_B, np_C)
def test_module_args_dtype(): hcl.init() def algorithm(A, B): @hcl.def_( [A.shape, B.shape, ()], [hcl.UInt(2), hcl.Int(32), hcl.Int(32)]) def add(A, B, x): hcl.return_(A[x] + B[x]) return hcl.compute(A.shape, lambda x: add(A, B, x), "C") A = hcl.placeholder((10, ), dtype=hcl.UInt(2)) B = hcl.placeholder((10, )) s = hcl.create_schedule([A, B], algorithm) f = hcl.build(s) a = np.random.randint(100, size=(10, )) b = np.random.randint(100, size=(10, )) c = np.zeros(10) _A = hcl.asarray(a, hcl.UInt(2)) _B = hcl.asarray(b) _C = hcl.asarray(c) f(_A, _B, _C) _A = _A.asnumpy() _B = _B.asnumpy() _C = _C.asnumpy() for i in range(0, 10): assert (_C[i] == a[i] % 4 + b[i])
def test_pack(): def pack(A): return hcl.pack(A, factor=4) for i in range(4, 36, 4): A = hcl.placeholder((40, ), "A", dtype=hcl.UInt(i // 4)) s = hcl.create_schedule([A], pack) f = hcl.build(s) _A = hcl.asarray(np.random.randint(1000, size=(40, )), dtype=hcl.UInt(i // 4)) _B = hcl.asarray(np.zeros(10), dtype=hcl.UInt(i)) f(_A, _B) __A = _A.asnumpy() __B = _B.asnumpy() for j in range(0, 10): golden = 0 numB = __B[j] for k in range(0, 4): numA = __A[j * 4 + k] golden += numA << (k * i // 4) assert numB == golden
def test_llvm_(length): hcl.init(hcl.UInt(32)) input_vec = hcl.placeholder((length, ), name="input") # assume gsize = lsize = 1 def math_func(input_vec): new_vec = hlib.ip.byte_swap_rtl(input_vec) return hcl.compute(input_vec.shape, lambda *args: new_vec[args] + 1, name="ret") s = hcl.create_schedule([input_vec], math_func) x_np = np.random.randint(low=2**16, high=2**20, size=length) y_np = np.zeros((length)) for i in range(length): y_np[i] = np.bitwise_and((1 << 32) - 1, np.bitwise_or(x_np[i] << 16, x_np[i] >> 16)) y_np[i] = y_np[i] + 1 f = hcl.build(s) x_hcl = hcl.asarray(x_np) y_hcl = hcl.asarray(np.zeros((length))) f(x_hcl, y_hcl) np.testing.assert_array_equal(y_np, y_hcl.asnumpy())
def test_mutate_complex(): def kernel(A, B): def foo(x): with hcl.for_(0, 10) as y: with hcl.if_(A[x][y] > 5): B[x] += 1 hcl.mutate((10, ), foo) A = hcl.placeholder((10, 10)) B = hcl.placeholder((10, )) s = hcl.create_schedule([A, B], kernel) f = hcl.build(s) np_A = numpy.random.randint(10, size=(10, 10)) np_B = numpy.zeros((10, )) gold_B = [] for i in range(0, 10): gold_B.append(len([x for x in np_A[i] if x > 5])) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() for i in range(0, 10): assert ret_B[i] == gold_B[i]
def test_unpack_dtype(): def unpack(A, B): C = hcl.unpack(A, name="C", dtype=B.dtype) hcl.update(B, lambda x: C[x]) for i in range(4, 36, 4): A = hcl.placeholder((10, ), "A", dtype=hcl.UInt(i)) B = hcl.placeholder((40, ), "B", dtype=hcl.UInt(i // 4)) s = hcl.create_schedule([A, B], unpack) f = hcl.build(s) _A = hcl.asarray(np.random.randint(1000, size=(10, )), dtype=hcl.UInt(i)) _B = hcl.asarray(np.zeros(40), dtype=hcl.UInt(i // 4)) f(_A, _B) __A = _A.asnumpy() __B = _B.asnumpy() for j in range(0, 10): for k in range(0, 4): numA = __A[j] numB = __B[j * 4 + k] golden = (numA >> (i // 4 * k)) % (1 << (i // 4)) assert numB == golden
def test_get_slice_tensor_reverse(): hcl.init() def kernel(A): return hcl.compute(A.shape, lambda x: A[x][0:8]) A = hcl.placeholder((10, )) s = hcl.create_schedule(A, kernel) f = hcl.build(s) np_A = np.random.randint(10, size=(10, )) np_B = np.zeros(10) golden = np_A & 0xFF golden = golden.astype('uint8') hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) f(hcl_A, hcl_B) ret = hcl_B.asnumpy() ret = ret.astype('uint8') for i in range(0, 10): x = np.unpackbits(golden[i]) x = np.flip(x) y = np.unpackbits(ret[i]) assert np.array_equal(x, y)
def test_set_slice_tensor_reverse(): hcl.init(hcl.UInt(8)) def kernel(A, B): with hcl.for_(0, 10) as i: B[i][0:8] = A[i] A = hcl.placeholder((10, )) B = hcl.placeholder((10, )) s = hcl.create_schedule([A, B], kernel) f = hcl.build(s) np_A = np.random.randint(1, size=(10, )) np_B = np.random.randint(10, size=(10, )) np_A = np_A.astype('uint8') np_B = np_B.astype('uint8') hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) f(hcl_A, hcl_B) ret = hcl_B.asnumpy() ret = ret.astype('uint8') for i in range(0, 10): a = np.flip(np.unpackbits(np_A[i])) b = np.unpackbits(ret[i]) assert np.array_equal(a, b)
def test_mixed_stream(): if os.system("which vivado_hls >> /dev/null") != 0: return A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") def kernel(A, B): C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], "C") D = hcl.compute(C.shape, lambda i, j: C[i][j] * 2, "D") E = hcl.compute(C.shape, lambda i, j: D[i][j] * 3, "E") return E target = hcl.platform.aws_f1 s = hcl.create_schedule([A, B], kernel) s.to([A, B], target.xcel) s.to(kernel.D, target.host) s.to(kernel.C, s[kernel.D]) target.config(compile="vivado_hls", mode="csim") f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.random.randint(10, size=(10, 32)) np_C = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) hcl_C = hcl.asarray(np_C, dtype=hcl.Int(32)) f(hcl_A, hcl_B, hcl_C) ret_C = hcl_C.asnumpy() np.testing.assert_array_equal(ret_C, (np_A + np_B) * 6)
def partition_test(): if os.system("which vivado_hls >> /dev/null") != 0: return A = hcl.placeholder((10, 10), "A", dtype=hcl.UInt(8)) def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B", dtype=hcl.UInt(8)) return B target = hcl.platform.zc706 s = hcl.create_schedule([A], kernel) s.to(kernel.B, target.host) A_ = s.to(A, target.xcel) s.partition(A_, hcl.Partition.Block, dim=1, factor=2) target.config(compile="vivado_hls", mode="csim") f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 10)) np_B = np.zeros((10, 10)) hcl_A = hcl.asarray(np_A, dtype=hcl.UInt(8)) hcl_B = hcl.asarray(np_B, dtype=hcl.UInt(8)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy()
def test_hls(target_mode): hcl.init() A = hcl.placeholder((10, 32), "A") def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") C = hcl.compute(A.shape, lambda *args: B[args] + 1, "C") D = hcl.compute(A.shape, lambda *args: C[args] * 2, "D") return D target = hcl.platform.aws_f1 s = hcl.create_schedule([A], kernel) s.to(kernel.B, target.xcel) s.to(kernel.C, target.host) target.config(compile="vivado_hls", mode=target_mode) f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() if "csyn" in target_mode: report = f.report("csyn") assert "ReportVersion" in report elif "csim" in target_mode: np.testing.assert_array_equal(ret_B, (np_A + 2) * 2)
def test_intel_aocl(): if os.system("which aocl >> /dev/null") != 0: return hcl.init() A = hcl.placeholder((10, 32), "A") def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") C = hcl.compute(A.shape, lambda *args: B[args] + 1, "C") D = hcl.compute(A.shape, lambda *args: C[args] * 2, "D") return D target = hcl.platform.vlab s = hcl.create_schedule([A], kernel) s.to(kernel.B, target.xcel) s.to(kernel.C, target.host) target.config(compile="aocl", mode="sw_sim") f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() np.testing.assert_array_equal(ret_B, (np_A + 2) * 2)
def main(): dtype = hcl.Float() input_image = hcl.placeholder((480, 640), name="input", dtype=dtype) output_image = hcl.placeholder((480, 640), name="output", dtype=dtype) soda_schedule = hcl.create_schedule([input_image, output_image], jacobi) soda_schedule[jacobi.output].stencil(unroll_factor=8) print(hcl.build(soda_schedule, target='soda')) print(hcl.build(soda_schedule, target='soda_xhls')) with open("kernel.cpp", "w") as fp: kernel = hcl.build(soda_schedule, target='soda_xhls') fp.write(kernel) llvm_schedule = hcl.create_schedule([input_image, output_image], jacobi) program = hcl.build(llvm_schedule) data_in = hcl.asarray(np.random.random(input_image.shape), dtype=hcl.Float()) data_out = hcl.asarray(np.zeros(output_image.shape), dtype=hcl.Float()) start = time.perf_counter() program(data_in, data_out) latency = time.perf_counter() - start print(f"CPU execution time {latency}")
def test_duplicated(): if os.system("which vivado_hls >> /dev/null") != 0: return A = hcl.placeholder((10, ), "A") def kernel(A): B = hcl.compute(A.shape, lambda i: A[i] + 1, "B") C = hcl.compute(B.shape, lambda i: B[i] + 1, "C") return C target = hcl.Platform.zc706 target.config(compiler="vivado_hls", mode="csyn") s = hcl.create_schedule([A], kernel) s.to([A], target.xcel) s.to(kernel.C, target.host) s.to(kernel.B, s[kernel.C]) # ignored duplicated streaming s.to(kernel.B, s[kernel.C]) f = hcl.build(s, target) np_A = np.zeros((10, )) np_C = np.zeros((10, )) hcl_A = hcl.asarray(np_A) hcl_C = hcl.asarray(np_C) f(hcl_A, hcl_C)
def squeeze_test(in_shape, axis=None): hcl.init() input1 = hcl.placeholder(in_shape) def func(input1, axis=axis): return hlib.op.nn.squeeze(input1, axis) s = hcl.create_schedule([input1], func) f = hcl.build(s) _in = np.random.randint(50, size=in_shape) real_out = _in real_out = np.squeeze(real_out, axis) def _new_shape(in_shape, axis): new_shape = [] if (axis is None): for i in range(len(in_shape)): if in_shape[i] != 1: new_shape.append(in_shape[i]) else: for i in range(len(in_shape)): if i not in axis: new_shape.append(in_shape[i]) return new_shape _out = hcl.asarray(np.zeros(_new_shape(in_shape, axis))) _in = hcl.asarray(_in) f(_in, _out) return _in.asnumpy(), _out.asnumpy(), real_out
def test_fcompute_multiple_return_multi_dim(): def kernel(A): def foo(x, y, z): with hcl.if_(A[x, y, z] > 5): hcl.return_(x) with hcl.else_(): hcl.return_(0) return hcl.compute(A.shape, foo) A = hcl.placeholder((10, 10, 10)) s = hcl.create_schedule(A, kernel) f = hcl.build(s) np_A = numpy.random.randint(10, size=(10, 10, 10)) np_B = numpy.zeros((10, 10, 10)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() for i in range(0, 10): for j in range(0, 10): for k in range(0, 10): if np_A[i][j][k] > 5: assert ret_B[i][j][k] == i else: assert ret_B[i][j][k] == 0
def expand_dim_test(in_shape, axis, new_axis): hcl.init() input1 = hcl.placeholder(in_shape) def func(input1, axis=axis, new_axis=new_axis): return hlib.op.nn.expand_dims(input1, axis, new_axis) s = hcl.create_schedule([input1], func) f = hcl.build(s) _in = np.random.randint(50, size=in_shape) real_out = _in for i in range(new_axis): real_out = np.expand_dims(real_out, axis) def _new_shape(in_shape, axis, new_axis): new_shape = [] for i in range(axis): new_shape.append(in_shape[i]) for i in range(new_axis): new_shape.append(1) for i in range(len(in_shape) - axis): new_shape.append(in_shape[i + axis]) return new_shape _out = hcl.asarray(np.zeros(_new_shape(in_shape, axis, new_axis))) _in = hcl.asarray(_in) f(_in, _out) return _in.asnumpy(), _out.asnumpy(), real_out
def test_pack_multi_dimension(): def pack(A): return hcl.pack(A, axis=1, factor=4) for i in range(4, 36, 4): A = hcl.placeholder((10, 40), "A", dtype=hcl.UInt(i // 4)) s = hcl.create_schedule([A], pack) f = hcl.build(s) _A = hcl.asarray(np.random.randint(1000, size=(10, 40)), dtype=hcl.UInt(i // 4)) _B = hcl.asarray(np.zeros((10, 10)), dtype=hcl.UInt(i)) f(_A, _B) __A = _A.asnumpy() __B = _B.asnumpy() for j in range(0, 10): for k in range(0, 10): golden = 0 numB = __B[j, k] for l in range(0, 4): numA = __A[j, k * 4 + l] golden += numA << (l * i // 4) assert numB == golden
def split_test(in_shape, i_or_s, axis=0): hcl.init() input1 = hcl.placeholder(in_shape) def func(input1, i_or_s=i_or_s, axis=axis): return hlib.op.nn.split(input1, i_or_s, axis) s = hcl.create_schedule([input1], func) f = hcl.build(s) _in = np.random.randint(50, size=in_shape) real_out = np.split(_in, i_or_s, axis) new_shape = [] for i in range(len(real_out)): new_shape.append(real_out[i].shape) _out = [] if isinstance(i_or_s, list): num_outputs = len(i_or_s) + 1 elif isinstance(i_or_s, int): num_outputs = i_or_s for i in range(num_outputs): _out.append(hcl.asarray(np.zeros(new_shape[i]))) _in = hcl.asarray(_in) f(_in, *_out) for i in range(len(_out)): _out[i] = _out[i].asnumpy() return _in.asnumpy(), _out, real_out
def test_unpack(): def unpack(A): return hcl.unpack(A, factor=4, name="B") for i in range(4, 36, 4): A = hcl.placeholder((10, ), "A", dtype=hcl.UInt(i)) s = hcl.create_schedule([A], unpack) f = hcl.build(s) _A = hcl.asarray(np.random.randint(1000, size=(10, )), dtype=hcl.UInt(i)) _B = hcl.asarray(np.zeros(40), dtype=hcl.UInt(i // 4)) f(_A, _B) __A = _A.asnumpy() __B = _B.asnumpy() for j in range(0, 10): for k in range(0, 4): numA = __A[j] numB = __B[j * 4 + k] golden = (numA >> (i // 4 * k)) % (1 << (i // 4)) assert numB == golden
def test_module_declarative(): hcl.init() def algorithm(a, b, c): @hcl.def_([a.shape, b.shape, c.shape]) def add(a, b, c): hcl.update(c, lambda *x: a[x] + b[x]) add(a, b, c) a = hcl.placeholder((10, )) b = hcl.placeholder((10, )) c = hcl.placeholder((10, )) s = hcl.create_schedule([a, b, c], algorithm) f = hcl.build(s) a = np.random.randint(100, size=(10, )) b = np.random.randint(100, size=(10, )) c = np.zeros(10) _a = hcl.asarray(a) _b = hcl.asarray(b) _c = hcl.asarray(c) f(_a, _b, _c) assert np.array_equal(_c.asnumpy(), a + b)
def run(): # Data preparation train_images, _, test_images, test_labels = read_digitrec_data() # Classification and testing correct = 0.0 # We have 180 test images total_time = 0 for i in range(0, 180): # Prepare input data to offload function # To load the tensors into the offloaded function, we must first cast it to # the correct data type. hcl_train_images = hcl.asarray(train_images, dtype_image) hcl_knn_mat = hcl.asarray(np.zeros((10, 3)), dtype_knnmat) # Execute the offload function and collect the candidates start = time.time() offload(test_images[i], hcl_train_images, hcl_knn_mat) total_time = total_time + (time.time() - start) # Convert back to a numpy array knn_mat = hcl_knn_mat.asnumpy() # Feed the candidates to the voting algorithm and compare the labels if knn_vote(knn_mat) == test_labels[i]: correct += 1 print("Average kernel time (s): {:.2f}".format(total_time / 180)) print("Accuracy (%): {:.2f}".format(100 * correct / 180)) # for testing assert (correct >= 150.0)
def test_module_no_return(): def algorithm(A, B): @hcl.def_([A.shape, B.shape, ()]) def update_B(A, B, x): B[x] = A[x] + 1 with hcl.Stage(): with hcl.for_(0, 10) as i: update_B(A, B, i) A = hcl.placeholder((10, )) B = hcl.placeholder((10, )) s = hcl.create_schedule([A, B], algorithm) f = hcl.build(s) a = np.random.randint(100, size=(10, )) b = np.zeros(10) _A = hcl.asarray(a) _B = hcl.asarray(b, hcl.Int()) f(_A, _B) _A = _A.asnumpy() _B = _B.asnumpy() for i in range(0, 10): assert (_B[i] == a[i] + 1)
def _test_llvm(length): hcl.init(hcl.Float()) X_real = hcl.placeholder((length, ), name="X_real") X_imag = hcl.placeholder((length, ), name="X_imag") def math_func(A, B): return hlib.ip.single_fft_hls(A, B) s = hcl.create_schedule([X_real, X_imag], math_func) f = hcl.build(s) x_real_np = np.random.random((length)) x_imag_np = np.random.random((length)) x_np = x_real_np + 1j * x_imag_np out_np = np.fft.fft(x_np) out_real_np = out_np.real out_imag_np = out_np.imag x_real_hcl = hcl.asarray(x_real_np) x_imag_hcl = hcl.asarray(x_imag_np) out_real_hcl = hcl.asarray(np.zeros((length))) out_imag_hcl = hcl.asarray(np.zeros((length))) f(x_real_hcl, x_imag_hcl, out_real_hcl, out_imag_hcl) np.testing.assert_allclose(out_real_np, out_real_hcl.asnumpy(), rtol=1e-02, atol=1e-3) np.testing.assert_allclose(out_imag_np, out_imag_hcl.asnumpy(), rtol=1e-02, atol=1e-3)
def test_module_declarative_compute_at(): hcl.init() def algorithm(a, b, c): @hcl.def_([a.shape, b.shape, c.shape]) def add(a, b, c): d = hcl.compute(a.shape, lambda *x: a[x] + b[x], "d") hcl.update(c, lambda *x: d[x] + 1, "u") add(a, b, c) a = hcl.placeholder((10, )) b = hcl.placeholder((10, )) c = hcl.placeholder((10, )) s = hcl.create_schedule([a, b, c], algorithm) add = algorithm.add s[add.d].compute_at(s[add.u], add.u.axis[0]) f = hcl.build(s) a = np.random.randint(100, size=(10, )) b = np.random.randint(100, size=(10, )) c = np.zeros(10) _a = hcl.asarray(a) _b = hcl.asarray(b) _c = hcl.asarray(c) f(_a, _b, _c) assert np.array_equal(_c.asnumpy(), a + b + 1)
def test_module_quantize_ret_dtype(): hcl.init() def algorithm(A, B): @hcl.def_([A.shape, B.shape, ()]) def add(A, B, x): hcl.return_(A[x] + B[x]) return hcl.compute(A.shape, lambda x: add(A, B, x), "C") A = hcl.placeholder((10, )) B = hcl.placeholder((10, )) s = hcl.create_scheme([A, B], algorithm) s.downsize([algorithm.add, algorithm.C], hcl.UInt(2)) s = hcl.create_schedule_from_scheme(s) f = hcl.build(s) a = np.random.randint(100, size=(10, )) b = np.random.randint(100, size=(10, )) c = np.zeros(10) _A = hcl.asarray(a) _B = hcl.asarray(b) _C = hcl.asarray(c, hcl.UInt(2)) f(_A, _B, _C) _A = _A.asnumpy() _B = _B.asnumpy() _C = _C.asnumpy() for i in range(0, 10): assert (_C[i] == (a[i] + b[i]) % 4)
def test_module_mixed_paradigm(): hcl.init() def algorithm(a, b, c): @hcl.def_([a.shape, b.shape, c.shape]) def add(a, b, c): with hcl.for_(0, 10) as i: a[i] = 0 d = hcl.compute(a.shape, lambda *x: a[x] + b[x]) hcl.update(c, lambda *x: d[x] + 1) add(a, b, c) a = hcl.placeholder((10, )) b = hcl.placeholder((10, )) c = hcl.placeholder((10, )) s = hcl.create_schedule([a, b, c], algorithm) f = hcl.build(s) a = np.random.randint(100, size=(10, )) b = np.random.randint(100, size=(10, )) c = np.zeros(10) _a = hcl.asarray(a) _b = hcl.asarray(b) _c = hcl.asarray(c) f(_a, _b, _c) assert np.array_equal(_c.asnumpy(), b + 1)
def test_module_with_return(): def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): hcl.return_(A[x] + 1) hcl.update(B, lambda x: update_B(A, x)) A = hcl.placeholder((10, )) B = hcl.placeholder((10, )) s = hcl.create_schedule([A, B], algorithm) f = hcl.build(s) a = np.random.randint(100, size=(10, )) b = np.zeros(10) _A = hcl.asarray(a) _B = hcl.asarray(b, hcl.Int()) f(_A, _B) _A = _A.asnumpy() _B = _B.asnumpy() for i in range(0, 10): assert (_B[i] == a[i] + 1)
def test_reuse_blur_x_y(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute( (8, 8), lambda y, x: A[y, x] + A[y + 1, x + 1] + A[y + 2, x + 2], "B") s = hcl.create_schedule([A, B]) RB_y = s.reuse_at(A, s[B], B.axis[0], "RB_y") RB_x = s.reuse_at(RB_y, s[B], B.axis[1], "RB_x") f = hcl.build(s) np_A = np.random.randint(0, 10, size=(10, 10)) np_B = np.zeros((8, 8), dtype="int") np_C = np.zeros((8, 8), dtype="int") for y in range(0, 8): for x in range(0, 8): np_C[y][x] = np_A[y][x] + np_A[y + 1][x + 1] + np_A[y + 2][x + 2] hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) f(hcl_A, hcl_B) np_B = hcl_B.asnumpy() assert np.array_equal(np_B, np_C)