def test_permuted(): M.set(50) N.set(20) K.set(5) print('Matrix multiplication %dx%dx%d' % (M.get(), N.get(), K.get())) # Initialize arrays: Randomize A and B, zero C A = dace.ndarray([M, N], dtype=dace.float64) B = dace.ndarray([N, K], dtype=dace.float64) C = dace.ndarray([M, K], dtype=dace.float64) D = dace.ndarray([M, K, N], dtype=dace.float64) A[:] = np.random.rand(M.get(), N.get()).astype(dace.float64.type) B[:] = np.random.rand(N.get(), K.get()).astype(dace.float64.type) C[:] = dace.float64(0) D[:] = dace.float64(0) A_regression = np.ndarray([M.get(), N.get()], dtype=np.float64) B_regression = np.ndarray([N.get(), K.get()], dtype=np.float64) C_regression = np.ndarray([M.get(), K.get()], dtype=np.float64) A_regression[:] = A[:] B_regression[:] = B[:] C_regression[:] = C[:] mapreduce_test_4(A, B, C, D) np.dot(A_regression, B_regression, C_regression) diff = np.linalg.norm(C_regression - C) / (M.get() * K.get()) print("Difference:", diff) assert diff <= 1e-5
def test_axpy(): print("==== Program start ====") N.set(24) print('Scalar-vector multiplication %d' % (N.get())) # Initialize arrays: Randomize A and X, zero Y A = dace.float64(np.random.rand()) X = np.random.rand(N.get()).astype(np.float64) Y = np.random.rand(N.get()).astype(np.float64) A_regression = np.float64() X_regression = np.ndarray([N.get()], dtype=np.float64) Y_regression = np.ndarray([N.get()], dtype=np.float64) A_regression = A X_regression[:] = X[:] Y_regression[:] = Y[:] sdfg = common.vectorize(axpy) sdfg(A=A, X=X, Y=Y, N=N) c_axpy = sp.linalg.blas.get_blas_funcs('axpy', arrays=(X_regression, Y_regression)) if dace.Config.get_bool('profiling'): dace.timethis('axpy', 'BLAS', (2 * N.get()), c_axpy, X_regression, Y_regression, N.get(), A_regression) else: c_axpy(X_regression, Y_regression, N.get(), A_regression) diff = np.linalg.norm(Y_regression - Y) / N.get() print("Difference:", diff) print("==== Program end ====") assert diff <= 1e-5
def test_axpy_transformed(): n = 24 print(f'Scalar-vector multiplication {n}') A = dace.float64(np.random.rand()) X = np.random.rand(n) Y = np.random.rand(n) expected = A * X + Y # Obtain SDFG from @dace.program sdfg = axpy.to_sdfg() # Convert SDFG to FPGA using a transformation sdfg.apply_transformations(FPGATransformSDFG) # Specialize and execute SDFG on FPGA sdfg._name = f'axpy_fpga_{n}' sdfg.specialize(dict(N=n)) sdfg(A=A, X=X, Y=Y) diff = np.linalg.norm(expected - Y) / n assert diff <= 1e-5 return sdfg
def test_dot(): n = 64 N.set(n) A = dace.ndarray([N], dtype=dace.float32) out_AA = dace.scalar(dace.float64) A[:] = np.random.rand(n).astype(dace.float32.type) out_AA[0] = dace.float64(0) dot(A, A, out_AA, N=n) diff_aa = np.linalg.norm(np.dot(A, A) - out_AA) / float(n) print("Difference:", diff_aa) assert diff_aa <= 1e-5
def _test(sdfg): N.set(144) print('Vector double CUDA (shared memory) %d' % (N.get())) V = dace.ndarray([N], dace.float64) Vout = dace.ndarray([N], dace.float64) V[:] = np.random.rand(N.get()).astype(dace.float64.type) Vout[:] = dace.float64(0) sdfg(A=V, Vout=Vout, N=N) diff = np.linalg.norm(2 * V - Vout) / N.get() print("Difference:", diff) assert diff <= 1e-5
def _test(sdfg): W.set(128) H.set(64) print('Vector double CUDA (shared memory 2D) %dx%d' % (W.get(), H.get())) V = dace.ndarray([H, W], dace.float64) Vout = dace.ndarray([H, W], dace.float64) V[:] = np.random.rand(H.get(), W.get()).astype(dace.float64.type) Vout[:] = dace.float64(0) sdfg(V=V, Vout=Vout, H=H, W=W) diff = np.linalg.norm(2 * V - Vout) / (H.get() * W.get()) print("Difference:", diff) assert diff <= 1e-5
if __name__ == "__main__": M.set(50) N.set(20) K.set(5) print('Matrix multiplication %dx%dx%d' % (M.get(), N.get(), K.get())) # Initialize arrays: Randomize A and B, zero C A = dace.ndarray([M, N], dtype=dace.float64) B = dace.ndarray([N, K], dtype=dace.float64) C = dace.ndarray([M, K], dtype=dace.float64) D = dace.ndarray([M, K, N], dtype=dace.float64) A[:] = np.random.rand(M.get(), N.get()).astype(dace.float64.type) B[:] = np.random.rand(N.get(), K.get()).astype(dace.float64.type) C[:] = dace.float64(0) D[:] = dace.float64(0) A_regression = np.ndarray([M.get(), N.get()], dtype=np.float64) B_regression = np.ndarray([N.get(), K.get()], dtype=np.float64) C_regression = np.ndarray([M.get(), K.get()], dtype=np.float64) A_regression[:] = A[:] B_regression[:] = B[:] C_regression[:] = C[:] mapreduce_test_4(A, B, C, D) np.dot(A_regression, B_regression, C_regression) diff = np.linalg.norm(C_regression - C) / (M.get() * K.get()) print("Difference:", diff) exit(0 if diff <= 1e-5 else 1)
parser = argparse.ArgumentParser() parser.add_argument("N", type=int, nargs="?", default=64) args = vars(parser.parse_args()) A = dace.ndarray([N], dtype=dace.float32) B = dace.ndarray([N], dtype=dace.float32) out_AB = dace.scalar(dace.float64) out_AA = dace.scalar(dace.float64) N.set(args["N"]) print('Dot product %d' % (N.get())) A[:] = np.random.rand(N.get()).astype(dace.float32.type) B[:] = np.random.rand(N.get()).astype(dace.float32.type) out_AB[0] = dace.float64(0) out_AA[0] = dace.float64(0) cdot = dace.compile(dot, A, B, out_AB) cdot(A, B, out_AB) # To allow reloading the SDFG code file with the same name del cdot cdot_self = dace.compile(dot, A, A, out_AA) cdot_self(A, A, out_AA) diff_ab = np.linalg.norm(np.dot(A, B) - out_AB) / float(N.get()) diff_aa = np.linalg.norm(np.dot(A, A) - out_AA) / float(N.get()) print("Difference (A*B):", diff_ab) print("Difference (A*A):", diff_aa)
print("==== Program start ====") parser = argparse.ArgumentParser() parser.add_argument("M", type=int, nargs="?", default=128) parser.add_argument("N", type=int, nargs="?", default=128) args = vars(parser.parse_args()) M.set(args["M"]) N.set(args["N"]) print('Matrix point-wise op %dx%d' % (M.get(), N.get())) # Initialize arrays: Randomize A and B, zero C A[1, 2, 3] = np.random.rand(M.get(), N.get()).astype(dace.float64.type) B[1, 3, 2, 1] = np.random.rand(M.get(), N.get()).astype(dace.float64.type) C[2, 2, 0] = dace.float64(0) A_regression = np.ndarray([M.get(), N.get()], dtype=np.float64) B_regression = np.ndarray([M.get(), N.get()], dtype=np.float64) A_regression[:] = A[1, 2, 3] B_regression[:] = B[1, 3, 2, 1] C_regression = C[2, 2, 0] mpwop = SDFG(name='mpwop') state = mpwop.add_state(label='mpwop') A_node = state.add_array('A', A.shape, dace.float64) B_node = state.add_array('B', B.shape, dace.float64) C_node = state.add_array('C', C.shape, dace.float64) np_frontend.op_impl.matrix_pointwise_op(state, A_node, A_node,
# Transient variable @dace.map(_[0:N:32]) def multiplication(i): @dace.map(_[0:32]) def mult_block(bi): in_V << V[i + bi] out >> Vout[i + bi] out = in_V * 2 @dace.map(_[0:32]) def mult_block_2(bi): in_V << V[i + bi] out >> Vout[i + bi] out = in_V * 2 if __name__ == "__main__": N.set(128) print('Vector double CUDA (block) %d' % (N.get())) V[:] = np.random.rand(N.get()).astype(dace.float64.type) Vout[:] = dace.float64(0) cudahello(V, Vout) diff = np.linalg.norm(2 * V - Vout) / N.get() print("Difference:", diff) print("==== Program end ====") exit(0 if diff <= 1e-5 else 1)
out = in_A * in_X + in_Y if __name__ == "__main__": print("==== Program start ====") parser = argparse.ArgumentParser() parser.add_argument("N", type=int, nargs="?", default=24) args = vars(parser.parse_args()) N.set(args["N"]) print('Scalar-vector multiplication %d' % (N.get())) # Initialize arrays: Randomize A and X, zero Y A = dace.float64(np.random.rand()) X = np.random.rand(N.get()).astype(np.float64) Y = np.random.rand(N.get()).astype(np.float64) A_regression = np.float64() X_regression = np.ndarray([N.get()], dtype=np.float64) Y_regression = np.ndarray([N.get()], dtype=np.float64) A_regression = A X_regression[:] = X[:] Y_regression[:] = Y[:] axpy(A, X, Y) c_axpy = sp.linalg.blas.get_blas_funcs('axpy', arrays=(X_regression, Y_regression)) if dace.Config.get_bool('profiling'):
def simple_constant_conversion(): return dace.float64(0)
parser = argparse.ArgumentParser() parser.add_argument("M", type=int, nargs="?", default=128) parser.add_argument("N", type=int, nargs="?", default=128) parser.add_argument("K", type=int, nargs="?", default=128) args = vars(parser.parse_args()) M.set(args["M"]) N.set(args["N"]) K.set(args["K"]) print('Matrix multiplication %dx%dx%d' % (M.get(), N.get(), K.get())) # Initialize arrays: Randomize A and B, zero C A[1, 2, 3] = np.random.rand(M.get(), N.get()).astype(dace.float64.type) B[1, 3, 2, 1] = np.random.rand(N.get(), K.get()).astype(dace.float64.type) C[2, 2] = dace.float64(0) A_regression = np.ndarray([M.get(), N.get()], dtype=np.float64) B_regression = np.ndarray([N.get(), K.get()], dtype=np.float64) C_regression = np.ndarray([M.get(), K.get()], dtype=np.float64) A_regression[:] = A[1, 2, 3] B_regression[:] = B[1, 3, 2, 1] C_regression[:] = C[2, 2] mmul = SDFG(name='mmul') state = mmul.add_state(label='mmul') A_node = state.add_array('A', A.shape, dace.float64) B_node = state.add_array('B', B.shape, dace.float64) C_node = state.add_array('C', C.shape, dace.float64) np_frontend.op_impl.matrix_multiplication(state, A_node,