def lifetimetest(input: dace.float64[N]): tmp = dace.ndarray([1], input.dtype) return tmp + 1
#!/usr/bin/env python from __future__ import print_function import dace import numpy as np H = dace.symbol('H') W = dace.symbol('W') V = dace.ndarray([H, W], dace.float64) Vout = dace.ndarray([H, W], dace.float64) @dace.program(dace.float64[H, W], dace.float64[H, W]) def cudahello(V, Vout): @dace.map(_[0:H:8, 0:W:32]) def multiplication(i, j): @dace.map(_[0:8, 0:32]) def mult_block(bi, bj): in_V << V[i + bi, j + bj] out >> Vout[i + bi, j + bj] out = in_V * 2.0 if __name__ == "__main__": W.set(128) H.set(64) print('Vector double CUDA (shared memory 2D) %dx%d' % (W.get(), H.get())) V[:] = np.random.rand(H.get(), W.get()).astype(dace.float64.type)
#!/usr/bin/env python import numpy as np import dace as dp from dace.sdfg import SDFG, InvalidSDFGError from dace.memlet import Memlet from dace.data import Scalar if __name__ == '__main__': print('SDFG memlet lifetime validation test') # Externals (parameters, symbols) N = dp.symbol('N') N.set(20) input = dp.ndarray([N], dp.int32) output = dp.ndarray([N], dp.int32) input[:] = dp.int32(5) output[:] = dp.int32(0) # Construct SDFG 1 # sdfg1 = SDFG('shouldntwork1') # state = sdfg1.add_state() # B = state.add_array('B', [N], dp.int32) # T = state.add_transient('T', [1], dp.int32) # # tasklet_gen = state.add_tasklet('mytasklet', {}, {'b'}, 'b = 5') # map_entry, map_exit = state.add_map('mymap', dict(k='0:N')) # # map_entry.add_in_connector('IN_1') # map_entry.add_out_connector('OUT_1') # map_exit.add_in_connector('IN_1') # map_exit.add_out_connector('OUT_1')
import math import dace import polybench N = dace.symbol('N') #datatypes = [dace.float64, dace.int32, dace.float32] datatype = dace.float64 # Dataset sizes sizes = [{N: 30}, {N: 90}, {N: 250}, {N: 1300}, {N: 2800}] args = [ dace.ndarray([N, N], datatype), dace.ndarray([N, N], datatype), dace.ndarray([N], datatype), dace.ndarray([N], datatype), dace.ndarray([N], datatype), dace.ndarray([1], datatype), dace.ndarray([1], datatype) ] outputs = [(4, 'y')] def init_array(A, B, tmp, x, y, alpha, beta): n = N.get() alpha[0] = datatype(1.5) beta[0] = datatype(1.2)
#!/usr/bin/env python import dace import math as mt import numpy as np @dace.program def myprint(input, N, M): @dace.tasklet def myprint(): a << input for i in range(0, N): for j in range(0, M): printf("%f\n", mt.sin(a[i, j])) input = dace.ndarray([10, 10], dtype=dace.float32) input[:] = np.random.rand(10, 10).astype(dace.float32.type) myprint(input, 10, 10)
args = vars(parser.parse_args()) E.set(args['edges']) V.set(args['vertices']) srcnode = args['source'] outfile = args['outfile'] regression = False if args['loadgr'] is not None: from support import readgr V, E, G_row, G_col = readgr.read_grfile(args['loadgr']) elif args['loadmtx'] is not None: M = scipy.io.mmread(args['loadmtx']).tocsr() E.set(M.nnz) V.set(M.shape[0]) G_row = dp.ndarray([V + 1], dtype=vtype) G_col = dp.ndarray([E], dtype=vtype) G_row[:] = M.indptr G_col[:] = M.indices else: # Generate a random graph graph = nx.gnm_random_graph(V.get(), E.get(), seed=args['seed']) E.set(E.get() * 2) # Extract adjacency matrix M = nx.to_scipy_sparse_matrix(graph, dtype=vtype.type).tocsr() assert M.nnz == E.get() G_row = dp.ndarray([V + 1], dtype=vtype) G_col = dp.ndarray([E], dtype=vtype) G_row[:] = M.indptr
"tmp = constant_array[i]\n" "out = tmp + incr") fpga_state.add_memlet_path(map_entry, tasklet, memlet=dace.Memlet()) fpga_state.add_memlet_path(tasklet, map_exit, out, src_conn="out", memlet=dace.Memlet("device_output[i]")) sdfg.add_edge(fpga_state, copy_out_state, dace.sdfg.sdfg.InterstateEdge()) sdfg.fill_scope_connectors() sdfg.validate() return sdfg if __name__ == "__main__": sdfg = make_sdfg() sdfg.add_constant('constant_array', CONSTANT_ARRAY) sdfg.add_constant('constant_value', CONSTANT_VALUE) out = dace.ndarray([CONSTANT_ARRAY.size], dtype=dace.float32) sdfg(N=CONSTANT_ARRAY.size, output=out) ref = CONSTANT_ARRAY + CONSTANT_VALUE diff = np.linalg.norm(ref - out) / CONSTANT_ARRAY.size if diff <= 1e-5: print("==== Program end ====") else: print("==== Program Error! ====")
N = dace.symbol('N') @dace.program(dace.float32[N], dace.float32[N]) def cudahello(V, Vout): # Transient variable @dace.map(_[0:N]) def multiplication(i): in_V << V[i] out >> Vout[i] out = in_V * 2.0 if __name__ == "__main__": N.set(52) print('Vector double CUDA %d' % (N.get())) V = dace.ndarray([N], dace.float32) Vout = dace.ndarray([N], dace.float32) V[:] = np.random.rand(N.get()).astype(dace.float32.type) Vout[:] = dace.float32(0) cudahello(V, Vout) diff = np.linalg.norm(2 * V - Vout) / N.get() print("Difference:", diff) print("==== Program end ====") exit(0 if diff <= 1e-5 else 1)
def multiplication(i): @dace.map(_[0:32]) def mult_block(bi): in_V << V[i + bi] out >> Vout[i + bi] out = in_V * 2 @dace.map(_[0:32]) def mult_block_2(bi): in_V << V[i + bi] out >> Vout[i + bi] out = in_V * 2 if __name__ == "__main__": N.set(128) print('Vector double CUDA (block) %d' % (N.get())) V = dace.ndarray([N], dace.float64) Vout = dace.ndarray([N], dace.float64) V[:] = np.random.rand(N.get()).astype(dace.float64.type) Vout[:] = dace.float64(0) cudahello(V, Vout) diff = np.linalg.norm(2 * V - Vout) / N.get() print("Difference:", diff) print("==== Program end ====") exit(0 if diff <= 1e-5 else 1)
def testprog8(A: dace.float32[20, 20]): i = dace.ndarray([1], dtype=dace.int32) i = 0 while i[0] < N: A += i i += 2
NK: 210, NL: 220 }, { NI: 800, NJ: 900, NK: 1100, NL: 1200 }, { NI: 1600, NJ: 1800, NK: 2200, NL: 2400 }] args = [ dace.ndarray([NI, NK], datatype), dace.ndarray([NK, NJ], datatype), dace.ndarray([NJ, NL], datatype), dace.ndarray([NI, NL], datatype), dace.ndarray([1], datatype), dace.ndarray([1], datatype) ] def init_array(A, B, C, D, alpha, beta): ni = NI.get() nj = NJ.get() nk = NK.get() nl = NL.get() alpha[0] = datatype(1.5)
t = a * 5 dace.reduce(lambda a, b: a + b, tmp, sum) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("W", type=int, nargs="?", default=128) parser.add_argument("H", type=int, nargs="?", default=128) args = vars(parser.parse_args()) W.set(args["W"]) H.set(args["H"]) print('Map-Reduce Test %dx%d' % (W.get(), H.get())) A = dace.ndarray([H, W], dtype=dace.float32) B = dace.ndarray([H, W], dtype=dace.float32) res = dace.ndarray([1], dtype=dace.float32) A[:] = np.random.rand(H.get(), W.get()).astype(dace.float32.type) B[:] = dace.float32(0) res[:] = dace.float32(0) mapreduce_test_3(A, B, res) diff = np.linalg.norm(5 * A - B) / float(dace.eval(H * W)) diff_res = abs((np.sum(B) - res[0])).view(type=np.ndarray) print("Difference:", diff, diff_res) print("==== Program end ====") exit(0 if diff <= 1e-5 and diff_res <= 1 else 1)
#!/usr/bin/env python from __future__ import print_function import gc import argparse import dace import numpy as np import scipy as sp import os from timeit import default_timer as timer N = dace.symbol('N') A = dace.float64 X = dace.ndarray([N], dtype=dace.float64) Y = dace.ndarray([N], dtype=dace.float64) @dace.program(dace.float64, dace.float64[N], dace.float64[N]) def axpy(A, X, Y): @dace.map(_[0:N]) def multiplication(i): in_A << A in_X << X[i] in_Y << Y[i] out >> Y[i] out = in_A * in_X + in_Y
def top(a: dace.float64[20]): tmp = dace.ndarray([20], dace.float64, lifetime=dace.AllocationLifetime.Persistent) return incall(a, tmp)
if args["specialize"]: N.set(args["N"]) sdfg = make_sdfg(True) sdfg.specialize(dict(W=W, N=N)) else: sdfg = make_sdfg(False) sdfg.specialize(dict(W=W)) N.set(args["N"]) sdfg.add_constant("num_stages", dace.int32(num_stages)) ratio = dtype(args["ratio"]) print("Predicate-Based Filter. size={}, ratio={} ({}specialized)".format( N.get(), ratio, "" if args["specialize"] else "not ")) A = dace.ndarray([N], dtype=dtype) B = dace.ndarray([N], dtype=dtype) outsize = dace.scalar(dace.uint32) outsize[0] = 0 A[:] = np.random.rand(N.get()).astype(dtype.type) B[:] = dtype(0) if args["specialize"]: sdfg.specialize(dict(N=N)) sdfg(A=A, B=B, outsize=outsize, ratio=ratio) else: sdfg(A=A, B=B, outsize=outsize, ratio=ratio, N=N) if dace.Config.get_bool('profiling'): dace.timethis('filter', 'numpy', 0, regression, A, ratio)
def for_loop(): A = dace.ndarray([10], dtype=dace.int32) A[:] = 0 for i in range(0, 10, 2): A[i] = i return A
}, { M: 80, N: 100 }, { M: 240, N: 260 }, { M: 1200, N: 1400 }, { M: 2600, N: 3000 }] args = [ dace.ndarray([N, M], datatype), dace.ndarray([M, M], datatype), dace.ndarray([M], datatype), dace.ndarray([M], datatype), M, N ] def init_array(data, corr, mean, stddev, M, N): n = N.get() m = M.get() for i in range(n): for j in range(m): data[i, j] = datatype(i * j) / m + i @dace.program(datatype[N, M], datatype[M, M], datatype[M], datatype[M])
if not args["specialize_all"]: H.set(args["H"]) T.set(args["T"]) else: jacobi.specialize(dict(H=H, T=T)) if T.get() % P.get() != 0: raise ValueError( "Iteration must be divisable by number of processing elements") print("Jacobi Stencil {}x{} ({} steps) with {} PEs{}".format( H.get(), W.get(), T.get(), P.get(), (" (fully specialized)" if args["specialize_all"] else ""))) A = dace.ndarray([H, W], dtype=dace.float32) # Initialize arrays: Randomize A, zero B A[:] = dace.float32(0) A[2:H.get() - 2, 2:W.get() - 2] = 1 regression = np.ndarray([H.get() - 4, W.get() - 4], dtype=np.float32) regression[:] = A[2:H.get() - 2, 2:W.get() - 2] ############################################# # Run DaCe program if args["specialize_all"]: jacobi(A=A) else: jacobi(A=A, H=H, T=T)
parser.add_argument("-specialize", default=False, action="store_true", help="Fix all symbols at compile time/in hardware") args = vars(parser.parse_args()) W.set(args["W"]) H.set(args["H"]) nnz.set(args["nnz"]) print( 'Sparse Matrix-Vector Multiplication {}x{} ({} non-zero elements, {}specialized)' .format(W.get(), H.get(), nnz.get(), "not " if not args["specialize"] else "")) A_row = dace.ndarray([H + 1], dtype=itype) A_col = dace.ndarray([nnz], dtype=itype) A_val = dace.ndarray([nnz], dtype=dtype) x = dace.ndarray([W], dtype) b = dace.ndarray([H], dtype) # Assuming uniform sparsity distribution across rows nnz_per_row = nnz.get() // H.get() nnz_last_row = nnz_per_row + (nnz.get() % H.get()) if nnz_last_row > W.get(): print('Too many nonzeros per row') exit(1) # RANDOMIZE SPARSE MATRIX A_row[0] = itype(0)
sdfg.add_edge(s6_else, s9, dace.InterstateEdge()) sdfg.add_edge(s9, end, dace.InterstateEdge()) code = sdfg.generate_code()[0].code for_pattern = "for.*i\s*=\s*0.*i\s*<\s*16" if re.search(for_pattern, code) is None: raise RuntimeError("For loop not detected in state transitions") while_pattern = "while.+i\s*<\s*128" if re.search(while_pattern, code) is None: raise RuntimeError("While loop not detected in state transitions") if_pattern = "if.+i\s*<\s*512" if re.search(if_pattern, code) is None: raise RuntimeError("If not detected in state transitions") else_pattern = "}\s*else\s*{" if re.search(else_pattern, code) is None: raise RuntimeError("Else not detected in state transitions") x_output = dace.ndarray([1], int) x_output[0] = 0 sdfg(x=x_output) x_output = x_output[0] if x_output != 128: raise RuntimeError("Expected x = 128, got {}".format(x_output))
import dace as dp import numpy as np W = dp.symbol('W') @dp.program def indirection(A, x, B): @dp.map(_[0:W]) def ind(i): bla << A[x[i]] out >> B[i] out = bla if __name__ == '__main__': W.set(5) A = dp.ndarray([W * W]) B = dp.ndarray([W]) x = dp.ndarray([W], dtype=dp.uint32) A[:] = np.arange(10, 10 + W.get() * W.get()) B[:] = dp.float32(0) x[:] = np.random.randint(0, W.get() * W.get(), W.get()) indirection(A, x, B) print(x.view(type=np.ndarray)) print(B.view(type=np.ndarray))
@dace.map(_[0:W]) def compute2(i): a << tmp[i] b >> BB[i] b = a + 1 @dace.program def prog(A, B, C): bla(A, B) bla(B, C) if __name__ == '__main__': W.set(3) A = dace.ndarray([W]) B = dace.ndarray([W]) C = dace.ndarray([W]) A[:] = np.mgrid[0:W.get()] B[:] = dace.float32(0.0) C[:] = dace.float32(0.0) prog(A, B, C) diff = np.linalg.norm((-(-A + 1) + 1) - C) / W.get() print("Difference:", diff) print("==== Program end ====") exit(0 if diff <= 1e-5 else 1)
def a2b(y, x): input << A[y - 1:y + 2, x - 1:x + 2] out >> B[y, x] out = (kernel[0, 0] * input[0, 0] + kernel[0, 1] * input[0, 1] + kernel[0, 2] * input[0, 2] + kernel[1, 0] * input[1, 0] + kernel[1, 1] * input[1, 1] + kernel[1, 2] * input[1, 2] + kernel[2, 0] * input[2, 0] + kernel[2, 1] * input[2, 1] + kernel[2, 2] * input[2, 2]) if __name__ == "__main__": print("==== Program start ====") print('Conv2D %dx%d' % (N.get(), N.get())) A = dace.ndarray([N, N], dtype=dace.float32) B = dace.ndarray([N, N], dtype=dace.float32) # Initialize arrays: Randomize A, zero B A[:] = dace.float32(0) B[:] = dace.float32(0) A[1:N.get() - 1, 1:N.get() - 1] = np.random.rand(dace.eval(N - 2), dace.eval(N - 2)).astype( dace.float32.type) regression = np.ndarray([N.get() - 2, N.get() - 2], dtype=np.float32) regression[:] = A[1:N.get() - 1, 1:N.get() - 1] #print(A.view(type=np.ndarray)) ############################################# # Run DaCe program
def test(): input = dace.ndarray([10, 10], dtype=dace.float32) input[:] = np.random.rand(10, 10).astype(dace.float32.type) myprint(input, 10, 10)
}, { M: 116, N: 124, }, { M: 390, N: 410, }, { M: 1900, N: 2100, }, { M: 1800, N: 2200, }] args = [ dace.ndarray([M, N], datatype), dace.ndarray([N], datatype), dace.ndarray([N], datatype) ] def init_array(A, x, y): n = N.get() m = M.get() fn = datatype(n) for i in range(n): x[i] = 1 + (i / fn) for i in range(m): for j in range(n): A[i, j] = datatype((i + j) % n) / (5 * m)
"write_map", {"i": "0:N"}, schedule=dace.dtypes.ScheduleType.Sequential) write_tasklet = state.add_tasklet("write", {"from_stream"}, {"to_memory"}, "to_memory = from_stream") # Inner edges state.add_edge(write_entry, None, write_tasklet, "from_stream", dace.memlet.Memlet.simple(fifo_out_1, "0")) state.add_edge(write_tasklet, "to_memory", write_exit, None, dace.memlet.Memlet.simple(array_out, "i")) # Outer edges state.add_edge(fifo_out_1, None, write_entry, None, dace.memlet.Memlet.simple(fifo_out_1, "0")) state.add_edge(write_exit, None, array_out, None, dace.memlet.Memlet.simple(array_out, "0:N")) ########################################################################### N.set(1024) array_in = dace.ndarray([2 * N], dace.dtypes.int32) array_in[:N.get()] = range(0, N.get()) array_in[N.get():] = range(0, N.get()) array_out = dace.ndarray([N], dace.dtypes.int32) sdfg(array_in=array_in, array_out=array_out, N=N) for i, val in enumerate(array_out): if val != 2 * i: print(i, val) raise ValueError
import math import dace import polybench import numpy as np N = dace.symbol('N') #datatypes = [dace.float64, dace.int32, dace.float32] datatype = dace.float64 # Dataset sizes sizes = [{N: 40}, {N: 120}, {N: 400}, {N: 2000}, {N: 4000}] args = [dace.ndarray([N, N], datatype)] def init_array(A): n = N.get() for i in range(0, n, 1): for j in range(0, i + 1, 1): # Python does modulo, while C does remainder ... A[i, j] = datatype(-(j % n)) / n + 1 for j in range(i + 1, n, 1): A[i, j] = datatype(0) A[i, i] = datatype(1) A[:] = np.dot(A, np.transpose(A)) @dace.program(datatype[N, N])
if args["specialize"]: print("Specializing M...") M.set(args["M"]) gemv = make_sdfg(args["specialize"]) gemv.specialize(dict(N=N)) if not args["specialize"]: M.set(args["M"]) else: gemv.specialize(dict(M=M)) print("Running GEMV {}x{} ({}specialized)".format( N.get(), M.get(), ("" if args["specialize"] else "not "))) A = dace.ndarray([M, N], dtype=dtype) x = dace.ndarray([M], dtype=dtype) y = dace.ndarray([N], dtype=dtype) # Intialize: randomize A, x and y # A[:, :] = np.random.rand(M.get(), N.get()).astype(dtype.type) # x[:] = np.random.rand(M.get()).astype(dtype.type) # y[:] = np.random.rand(N.get()).astype(dtype.type) A[:, :] = 1 x[:] = 1 y[:] = 0 # Regression regression = np.matmul(np.transpose(A), x) + y #############################################
import dace import numpy as np import dace.frontend.common as np_frontend import os from timeit import default_timer as timer SDFG = dace.sdfg.SDFG M = dace.symbol('M') N = dace.symbol('N') K = dace.symbol('K') L = dace.symbol('L') O = dace.symbol('O') alpha = dace.ndarray([L, O], dtype=dace.float64) A = dace.ndarray([M, N, K], dtype=dace.float64) B = dace.ndarray([M, N, K], dtype=dace.float64) if __name__ == "__main__": print("==== Program start ====") parser = argparse.ArgumentParser() parser.add_argument("M", type=int, nargs="?", default=128) parser.add_argument("N", type=int, nargs="?", default=128) parser.add_argument("K", type=int, nargs="?", default=128) parser.add_argument("L", type=int, nargs="?", default=5) parser.add_argument("O", type=int, nargs="?", default=10) args = vars(parser.parse_args()) M.set(args["M"])
if args["specialize"]: H.set(args["H"]) W.set(args["W"]) histogram = make_sdfg(True) histogram.specialize(dict(H=H, W=W, num_bins=num_bins)) else: histogram = make_sdfg(False) histogram.specialize(dict(num_bins=num_bins)) H.set(args["H"]) W.set(args["W"]) print("Histogram {}x{} ({}specialized)".format( H.get(), W.get(), "" if args["specialize"] else "not ")) A = dace.ndarray([H, W], dtype=dtype) hist = dace.ndarray([num_bins], dtype=dace.uint32) A[:] = np.random.rand(H.get(), W.get()).astype(dace.float32.type) hist[:] = dace.uint32(0) if args["specialize"]: histogram(A=A, hist=hist) else: histogram(A=A, H=H, W=W, hist=hist) if dace.Config.get_bool('profiling'): dace.timethis('histogram', 'numpy', (H.get() * W.get()), np.histogram, A, num_bins) diff = np.linalg.norm(