def fibonacci(iv: dace.int32[1], res: dace.float32[1]): # Define an unbounded stream S = dace.define_stream(dace.int32, 0) # Initialize stream with input value with dace.tasklet: i << iv s >> S s = i # Consume elements from the stream, with 4 processing elements in parallel. # The consume scope can push new values onto the stream S as it is working @dace.consume(S, 4) def scope(elem, p): # Set dynamic outgoing memlet to `S` (with -1 as the volume) sout >> S(-1) # The end result `res` has a sum write-conflict resolution with dynamic volume val >> res(-1, lambda a, b: a + b) # Setting `sout` to a value pushes it onto the stream if elem == 1: # End of recursion, set `val` to 1 to add it to `res` val = 1 elif elem > 1: # Otherwise, recurse by pushing smaller values sout = elem - 1 sout = elem - 2
def pbf(A: dace.float32[N], out: dace.float32[N], outsz: dace.uint32[1], ratio: dace.float32): # We define a stream (an object that behaves like a queue) so that we can dynamically # push values to `out` ostream = dace.define_stream(dace.float32, N) # The map evaluates a single element from `A` at a time for i in dace.map[0:N]: with dace.tasklet: a << A[i] r << ratio # The filter predicate is based on the ratio filter = (a > r) # If we should filter, writing `b = a` pushes `a` onto the stream if filter: b = a # With write-conflict resolution, storing the filter predicate would add it to `outsz` osz = filter # Writing to the output stream uses a dynamic output memlet, annotated with -1 b >> ostream(-1) # Writing to the output size is also dynamic, and uses the sum write-conflict resolution osz >> outsz(-1, lambda x, y: x + y, 0) # Lastly, we connect ostream to the output array. DaCe detects this pattern and emits # fast code that pushes results to `out` directly ostream >> out
def program(A, B): S_in = dace.define_stream(dace.float32, N) S_in << A for i in dace.map[0:N]: with dace.tasklet: a << S_in(-1) b >> B[i] b = a
def program(A, B): S_out = dace.define_stream(dace.float32, N) for i in dace.map[0:N]: with dace.tasklet: a << A[i] b >> S_out(-1) b = a S_out >> B
def program(A, B): stream = dace.define_stream(dace.float32, N) for i in dace.map[0:N]: with dace.tasklet: a << A[i] s >> stream(-1) s = 42.0 stream >> B
def transients(A: dace.float32[10]): ostream = dace.define_stream(dace.float32, 10) oscalar = dace.define_local_scalar(dace.int32) oarray = dace.define_local([10], dace.float32) oarray[:] = 0 oscalar = 0 for i in dace.map[0:10]: if A[i] >= 0.5: A[i] >> ostream(-1) oscalar += 1 ostream >> oarray return oscalar, oarray
def test(): s = dace.define_stream() S = dace.define_streamarray([2, 2]) for i in range(6): s[0].append(i) for j in range(2): S[0, j].append(i + j) S[1, j].append(i + j * 10) while len(s[0]): print(s[0].popleft()) while len(S[1, 1]): print(S[1, 1].popleft())
def transients(A: dace.float32[n]): ostream = dace.define_stream(dace.float32, n) oscalar = dace.define_local_scalar(dace.int32) oarray = dace.define_local([n], dace.float32) oarray[:] = 0 oscalar = 0 for i in dace.map[0:n]: if A[i] >= 0.5: A[i] >> ostream(-1) with dace.tasklet: out >> oscalar(1, lambda a, b: a + b) out = 1 ostream >> oarray return oscalar, oarray
def pbf(A, out, outsz, ratio): ostream = dace.define_stream(dace.float32, N) @dace.map(_[0:N]) def filter(i): a << A[i] r << ratio b >> ostream(-1) osz >> outsz(-1, lambda x, y: x + y, 0) if a > r: b = a osz = 1 ostream >> out
def pbf(A, out, outsz, ratio): ostream = dace.define_stream(dace.float32, 1) ostream >> out @dace.map(_[0:N]) def filter(i): a << A[i] b >> ostream(-1) osz >> outsz(-1, lambda x, y: x + y, 0) filter = (a > ratio) if filter: b = a osz = filter
def test(): s = dace.define_stream() S = dace.define_streamarray([2, 2]) for i in range(6): s[0].append(i) for j in range(2): S[0, j].append(i + j) S[1, j].append(i + j * 10) results = [] while len(s[0]): results.append(s[0].popleft()) while len(S[1, 1]): results.append(S[1, 1].popleft()) assert results == [0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15]
def fibonacci(iv: dp.int32[1], res: dp.float32[1]): S = dp.define_stream(dp.int32, 500) # Initialize stream with input value @dp.tasklet def init(): i << iv s >> S s = i @dp.consume(S, 4) def cons(elem, p): sout >> S(-1) val >> res(-1, lambda a, b: a + b)[0] if elem == 1: val = 1 elif elem > 1: # Recurse by pushing smaller values sout = elem - 1 sout = elem - 2
import dace if __name__ == '__main__': s = dace.define_stream() S = dace.define_streamarray([2, 2]) for i in range(6): s[0].append(i) for j in range(2): S[0, j].append(i + j) S[1, j].append(i + j * 10) while len(s[0]): print(s[0].popleft()) while len(S[1, 1]): print(S[1, 1].popleft())