def create_test_sdfg(): sdfg = dace.SDFG('test_sdfg') sdfg.add_array('BETA', shape=[10], dtype=dace.float32) sdfg.add_array('BETA_MAX', shape=[1], dtype=dace.float32) init_state = sdfg.add_state("init") state = sdfg.add_state("compute") sdfg.add_edge(init_state, state, dace.InterstateEdge()) for arr in ['BETA_MAX']: create_zero_initialization(init_state, arr) BETA_MAX = state.add_access('BETA_MAX') BETA = state.add_access('BETA') beta_max_reduce = state.add_reduce(wcr="lambda a, b: max(a, b)", axes=(0, ), identity=-999999) beta_max_reduce.implementation = 'CUDA (device)' state.add_edge(BETA, None, beta_max_reduce, None, dace.memlet.Memlet.simple(BETA.data, '0:10')) state.add_edge(beta_max_reduce, None, BETA_MAX, None, dace.memlet.Memlet.simple(BETA_MAX.data, '0:1')) return sdfg
def test_allocation_static(): """ Allocate an array with a constant-propagated symbolic size. """ sdfg = dace.SDFG('cprop_static_alloc') N = dace.symbol('N', dace.int32) sdfg.add_symbol('N', dace.int32) sdfg.add_array('tmp', [N], dace.int32, transient=True) sdfg.add_array('output', [1], dace.int32) a = sdfg.add_state() b = sdfg.add_state() c = sdfg.add_state_after(b) # First state, N=1 sdfg.add_edge(a, b, dace.InterstateEdge(assignments=dict(N=1))) t = b.add_tasklet('somecode', {}, {'out'}, 'out = 2') w = b.add_write('tmp') b.add_edge(t, 'out', w, None, dace.Memlet('tmp')) # Third state outputs value c.add_nedge(c.add_read('tmp'), c.add_write('output'), dace.Memlet('tmp[0]')) # Do not perform scalar-to-symbol promotion ConstantPropagation().apply_pass(sdfg, {}) assert len(sdfg.symbols) == 0 val = np.random.rand(1).astype(np.int32) sdfg(output=val) assert np.allclose(val, 2)
def test_two_to_one_cc_fusion(): """ Two states, first with two connected components, second with one. """ sdfg = dace.SDFG('state_fusion_test') sdfg.add_array('A', [1], dace.int32) sdfg.add_array('B', [1], dace.int32) sdfg.add_array('C', [1], dace.int32) state1, state2 = tuple(sdfg.add_state() for _ in range(2)) sdfg.add_edge(state1, state2, dace.InterstateEdge()) # First state state1.add_edge(state1.add_tasklet('one', {}, {'a'}, 'a = 1'), 'a', state1.add_write('A'), None, dace.Memlet('A')) t2 = state1.add_tasklet('two', {}, {'b', 'c'}, 'b = 2; c = 3') state1.add_edge(t2, 'b', state1.add_write('B'), None, dace.Memlet('B')) state1.add_edge(t2, 'c', state1.add_write('C'), None, dace.Memlet('C')) # Second state t2 = state2.add_tasklet('three', {'a', 'b', 'c'}, {'out'}, 'out = a+b+c') state2.add_edge(state2.add_read('A'), None, t2, 'a', dace.Memlet('A')) state2.add_edge(state2.add_read('B'), None, t2, 'b', dace.Memlet('B')) state2.add_edge(state2.add_read('C'), None, t2, 'c', dace.Memlet('C')) state2.add_edge(t2, 'out', state2.add_write('C'), None, dace.Memlet('C')) assert sdfg.apply_transformations_repeated(StateFusion, strict=True) == 1
def make_sdfg(specialized): if specialized: sdfg = dace.SDFG("mm_fpga_stream_{}x{}x{}".format( N.get(), K.get(), M.get())) else: sdfg = dace.SDFG("mm_fpga_stream_NxKx{}".format(M.get())) pre_state = make_copy_to_fpga_state(sdfg) compute_state = make_fpga_state(sdfg) post_state = make_copy_to_host_state(sdfg) sdfg.add_edge(pre_state, compute_state, dace.InterstateEdge()) sdfg.add_edge(compute_state, post_state, dace.InterstateEdge()) return sdfg
def test_two_cc_fusion_together(): """ Two states, both with two connected components, fused to one CC. """ sdfg = dace.SDFG('state_fusion_test') sdfg.add_array('A', [1], dace.int32) sdfg.add_array('B', [1], dace.int32) sdfg.add_array('C', [1], dace.int32) state1, state2 = tuple(sdfg.add_state() for _ in range(2)) sdfg.add_edge(state1, state2, dace.InterstateEdge()) # First state state1.add_edge(state1.add_tasklet('one', {}, {'a'}, 'a = 1'), 'a', state1.add_write('A'), None, dace.Memlet('A')) t2 = state1.add_tasklet('two', {}, {'b', 'c'}, 'b = 2; c = 3') state1.add_edge(t2, 'b', state1.add_write('B'), None, dace.Memlet('B')) state1.add_edge(t2, 'c', state1.add_write('C'), None, dace.Memlet('C')) # Second state state2.add_edge(state2.add_read('B'), None, state2.add_tasklet('one', {'a'}, {}, ''), 'a', dace.Memlet('B')) t2 = state2.add_tasklet('two', {'b', 'c'}, {'d', 'e'}, 'd = b + c; e = b') state2.add_edge(state2.add_read('A'), None, t2, 'b', dace.Memlet('A')) state2.add_edge(state2.add_read('C'), None, t2, 'c', dace.Memlet('C')) state2.add_edge(t2, 'd', state2.add_write('A'), None, dace.Memlet('A')) state2.add_edge(t2, 'e', state2.add_write('C'), None, dace.Memlet('C')) assert sdfg.apply_transformations_repeated(StateFusion) == 1
def test_nested_transient(self): """ Test nested SDFGs with transients. """ # Inner SDFG nsdfg = dace.SDFG('nested') nsdfg.add_array('a', [1], dace.float64) nsdfg.add_array('b', [1], dace.float64) nsdfg.add_transient('t', [1], dace.float64) # a->t state nstate = nsdfg.add_state() irnode = nstate.add_read('a') task = nstate.add_tasklet('t1', {'inp'}, {'out'}, 'out = 2*inp') iwnode = nstate.add_write('t') nstate.add_edge(irnode, None, task, 'inp', dace.Memlet.simple('a', '0')) nstate.add_edge(task, 'out', iwnode, None, dace.Memlet.simple('t', '0')) # t->a state first_state = nstate nstate = nsdfg.add_state() irnode = nstate.add_read('t') task = nstate.add_tasklet('t2', {'inp'}, {'out'}, 'out = 3*inp') iwnode = nstate.add_write('b') nstate.add_edge(irnode, None, task, 'inp', dace.Memlet.simple('t', '0')) nstate.add_edge(task, 'out', iwnode, None, dace.Memlet.simple('b', '0')) nsdfg.add_edge(first_state, nstate, dace.InterstateEdge()) # Outer SDFG sdfg = dace.SDFG('nested_transient_fission') sdfg.add_array('A', [2], dace.float64) state = sdfg.add_state() rnode = state.add_read('A') wnode = state.add_write('A') me, mx = state.add_map('outer', dict(i='0:2')) nsdfg_node = state.add_nested_sdfg(nsdfg, None, {'a'}, {'b'}) state.add_memlet_path(rnode, me, nsdfg_node, dst_conn='a', memlet=dace.Memlet.simple('A', 'i')) state.add_memlet_path(nsdfg_node, mx, wnode, src_conn='b', memlet=dace.Memlet.simple('A', 'i')) self.assertGreater(sdfg.apply_transformations(MapFission), 0) # Test A = np.random.rand(2) expected = A * 6 sdfg(A=A) self.assertTrue(np.allclose(A, expected))
def test_allocation_varying(parametric): """ Allocate an array with an initial (symbolic) size, then allocate an array with another size, and ensure constants are propagated properly. """ sdfg = dace.SDFG(f'cprop_alloc_{parametric}') N = dace.symbol('N', dace.int32) sdfg.add_symbol('N', dace.int32) sdfg.add_array('tmp1', [N], dace.int32, transient=True) sdfg.add_array('tmp2', [N], dace.int32, transient=True) sdfg.add_array('output', [1], dace.int32) a = sdfg.add_state() b = sdfg.add_state() c = sdfg.add_state() # First state, N=1 sdfg.add_edge(a, b, dace.InterstateEdge(assignments=dict(N=1))) t = b.add_tasklet('somecode', {}, {'out'}, 'out = 2') w = b.add_write('tmp1') b.add_edge(t, 'out', w, None, dace.Memlet('tmp1[0]')) # Second state, N=tmp1[0] (=2) if parametric: sdfg.add_edge(b, c, dace.InterstateEdge(assignments=dict(N='tmp1[0]'))) else: sdfg.add_edge(b, c, dace.InterstateEdge(assignments=dict(N=2))) t2 = c.add_tasklet('somecode2', {}, {'out'}, 'out = 3') t3 = c.add_tasklet('somecode2', {}, {'out'}, 'out = 4') w = c.add_write('tmp2') c.add_edge(t2, 'out', w, None, dace.Memlet('tmp2[0]')) c.add_edge(t3, 'out', w, None, dace.Memlet('tmp2[1]')) # Third state outputs value c.add_nedge(w, c.add_write('output'), dace.Memlet('tmp2[1]')) # Do not perform scalar-to-symbol promotion ConstantPropagation().apply_pass(sdfg, {}) assert len(sdfg.symbols) == 1 val = np.random.rand(1).astype(np.int32) sdfg(output=val) assert np.allclose(val, 4)
def test_nested_promotion_connector(with_subscript): # Construct SDFG postfix = 'a' if with_subscript: postfix = 'b' sdfg = dace.SDFG('testprog14{}'.format(postfix)) sdfg.add_array('A', [20, 20], dace.float64) sdfg.add_array('B', [1], dace.float64) sdfg.add_transient('scal', [1], dace.int32) initstate = sdfg.add_state() initstate.add_edge(initstate.add_tasklet('do', {}, {'out'}, 'out = 5'), 'out', initstate.add_write('scal'), None, dace.Memlet('scal')) state = sdfg.add_state_after(initstate) nsdfg = dace.SDFG('nested') nsdfg.add_array('a', [20, 20], dace.float64) nsdfg.add_array('b', [1], dace.float64) nsdfg.add_array('s', [1], dace.int32) nsdfg.add_symbol('s2', dace.int32) nstate1 = nsdfg.add_state() nstate2 = nsdfg.add_state() nsdfg.add_edge( nstate1, nstate2, dace.InterstateEdge(assignments=dict( s2='s[0]' if with_subscript else 's'))) a = nstate2.add_read('a') t = nstate2.add_tasklet('do', {'inp'}, {'out'}, 'out = inp') b = nstate2.add_write('b') nstate2.add_edge(a, None, t, 'inp', dace.Memlet('a[s2, s2 + 1]')) nstate2.add_edge(t, 'out', b, None, dace.Memlet('b[0]')) nnode = state.add_nested_sdfg(nsdfg, None, {'a', 's'}, {'b'}) aouter = state.add_read('A') souter = state.add_read('scal') bouter = state.add_write('B') state.add_edge(aouter, None, nnode, 'a', dace.Memlet('A')) state.add_edge(souter, None, nnode, 's', dace.Memlet('scal')) state.add_edge(nnode, 'b', bouter, None, dace.Memlet('B')) ####################################################### # Promotion assert scalar_to_symbol.find_promotable_scalars(sdfg) == {'scal'} scalar_to_symbol.promote_scalars_to_symbols(sdfg) sdfg.coarsen_dataflow() assert sdfg.number_of_nodes() == 1 assert sdfg.node(0).number_of_nodes() == 3 assert not any(isinstance(n, dace.nodes.NestedSDFG) for n in sdfg.node(0)) # Correctness A = np.random.rand(20, 20) B = np.random.rand(1) sdfg(A=A, B=B) assert B[0] == A[5, 6]
def test_sub_grid(): P = dace.symbol('P', dace.int32) sdfg = dace.SDFG("sub_grid_test") sdfg.add_symbol('P', dace.int32) _, darr = sdfg.add_array("dims", (1, ), dtype=dace.int32) _, parr = sdfg.add_array("periods", (1, ), dtype=dace.int32) _, carr = sdfg.add_array("coords", (1, ), dtype=dace.int32) _, varr = sdfg.add_array("valid", (1, ), dtype=dace.bool_) state = sdfg.add_state("start") parent_pgrid_name = comm._cart_create(None, sdfg, state, [1, P]) pgrid_name = comm._cart_sub(None, sdfg, state, parent_pgrid_name, [False, True]) state2 = sdfg.add_state("main") sdfg.add_edge(state, state2, dace.InterstateEdge()) tasklet = state2.add_tasklet( "MPI_Cart_get", {}, {'d', 'p', 'c', 'v'}, f"MPI_Cart_get(__state->{pgrid_name}_comm, P, &d, &p, &c);\nv = __state->{pgrid_name}_valid;", dtypes.Language.CPP) dims = state2.add_write("dims") periods = state2.add_write("periods") coords = state2.add_write("coords") valid = state2.add_write("valid") state2.add_edge(tasklet, 'd', dims, None, dace.Memlet.from_array("dims", darr)) state2.add_edge(tasklet, 'p', periods, None, dace.Memlet.from_array("periods", parr)) state2.add_edge(tasklet, 'c', coords, None, dace.Memlet.from_array("coords", carr)) state2.add_edge(tasklet, 'v', valid, None, dace.Memlet("valid[0]")) from mpi4py import MPI commworld = MPI.COMM_WORLD rank = commworld.Get_rank() size = commworld.Get_size() if size < 2: raise ValueError("Please run this test with at least two processes.") func = utils.distributed_compile(sdfg, commworld) dims = np.zeros((1, ), dtype=np.int32) periods = np.zeros((1, ), dtype=np.int32) coords = np.zeros((1, ), dtype=np.int32) valid = np.zeros((1, ), dtype=np.bool_) func(dims=dims, periods=periods, coords=coords, valid=valid, P=size) assert (np.array_equal(dims, [size])) assert (np.array_equal(periods, [0])) assert (np.array_equal(coords, [rank])) assert (valid[0])
def test(): sdfg = dace.SDFG('toplevel_interstate_test') _, tmpdesc = sdfg.add_transient('tmp', [1], dace.int32) # State that sets tmp state = sdfg.add_state() tasklet = state.add_tasklet('settmp', {}, {'t'}, 't = 5') wtmp = state.add_write('tmp') state.add_edge(tasklet, 't', wtmp, None, dace.Memlet.from_array('tmp', tmpdesc)) # States that uses tmp implicitly (only in interstate edge) state2 = sdfg.add_state() state2.add_tasklet('sayhi', {}, {}, 'printf("OK\\n")') state3 = sdfg.add_state() state3.add_tasklet('saybye', {}, {}, 'printf("FAIL\\n")') # Conditional edges that use tmp sdfg.add_edge(state, state2, dace.InterstateEdge('tmp[0] > 2')) sdfg.add_edge(state, state3, dace.InterstateEdge('tmp[0] <= 2'))
def test_state_duplication(self): try: sdfg = dace.SDFG('ok') s1 = sdfg.add_state('also_ok') s2 = sdfg.add_state('also_ok') s2.set_label('also_ok') sdfg.add_edge(s1, s2, dace.InterstateEdge()) sdfg.validate() self.fail('Failed to detect duplicate state') except dace.sdfg.InvalidSDFGError as ex: print('Exception caught:', ex)
def test_fuse_assignments_2(): """ Two states in which the first's state's input assignment depends on a symbol assigned (again) on the common interstate edge. Should fail. """ sdfg = dace.SDFG('state_fusion_test') state1 = sdfg.add_state() state2 = sdfg.add_state() state3 = sdfg.add_state() state4 = sdfg.add_state() state5 = sdfg.add_state() sdfg.add_edge(state1, state2, dace.InterstateEdge(assignments=dict(k=1))) sdfg.add_edge(state2, state3, dace.InterstateEdge(assignments=dict(k='k + 1'))) sdfg.add_edge(state3, state4, dace.InterstateEdge(assignments=dict(l='k + 1'))) sdfg.add_edge(state4, state5, dace.InterstateEdge(assignments=dict(k='k + 1'))) sdfg.apply_transformations_repeated(StateFusion) assert sdfg.number_of_nodes() == 5
def test_2d_assignment(): sdfg = dace.SDFG('assign2d') sdfg.add_array('A', [4, 2], dace.float64) state = sdfg.add_state() state2 = sdfg.add_state() state2.add_edge(state2.add_tasklet('assign', {}, {'a'}, 'a = i'), 'a', state2.add_write('A'), None, dace.Memlet('A[0, 0]')) sdfg.add_edge(state, state2, dace.InterstateEdge(assignments=dict(i='A[1, 1]'))) A = np.random.rand(4, 2) sdfg(A=A) assert np.allclose(A[0, 0], A[1, 1])
def test_dowhile(): sdfg = dace.SDFG('dowhiletest') sdfg.add_array('A', [1], dace.int32) init = sdfg.add_state() state1 = sdfg.add_state() sdfg.add_edge(init, state1, dace.InterstateEdge(assignments={'cond': '1'})) state2 = sdfg.add_state() sdfg.add_edge(state1, state2, dace.InterstateEdge(assignments={'cond': 'cond + 1'})) guard = sdfg.add_state_after(state2) after = sdfg.add_state() sdfg.add_edge(guard, state1, dace.InterstateEdge('cond < 5')) sdfg.add_edge(guard, after, dace.InterstateEdge('cond >= 5')) t = state1.add_tasklet('something', {'a'}, {'o'}, 'o = a + 1') r = state1.add_read('A') w = state1.add_write('A') state1.add_edge(r, None, t, 'a', dace.Memlet('A')) state1.add_edge(t, 'o', w, None, dace.Memlet('A')) A = np.zeros([1], dtype=np.int32) sdfg(A=A) assert A[0] == 4
def test_fsm(): # Could be interpreted as a while loop of a switch-case sdfg = dace.SDFG('fsmtest') sdfg.add_scalar('nextstate', dace.int32) sdfg.add_array('A', [1], dace.int32) start = sdfg.add_state() init = sdfg.add_state_after(start) case0 = sdfg.add_state() case1 = sdfg.add_state() case3 = sdfg.add_state() case5 = sdfg.add_state() estate = sdfg.add_state() # State transitions fsm = {0: 3, 3: 1, 1: 5, 5: 7} for case, state in [(0, case0), (1, case1), (3, case3), (5, case5)]: sdfg.add_edge(init, state, dace.InterstateEdge(f'nextstate == {case}')) r = state.add_read('A') t = state.add_tasklet('update', {'ain'}, {'a', 'nstate'}, f'a = ain + {case}; nstate = {fsm[case]}') w = state.add_write('A') ws = state.add_write('nextstate') state.add_edge(r, None, t, 'ain', dace.Memlet('A')) state.add_edge(t, 'a', w, None, dace.Memlet('A')) state.add_edge(t, 'nstate', ws, None, dace.Memlet('nextstate')) sdfg.add_edge(state, estate, dace.InterstateEdge()) sdfg.add_edge(estate, init, dace.InterstateEdge()) A = np.array([1], dtype=np.int32) sdfg(A=A, nextstate=0) assert A[0] == 1 + 3 + 1 + 5 if dace.Config.get_bool('optimizer', 'detect_control_flow'): code = sdfg.generate_code()[0].clean_code assert 'switch ' in code
def test_2d_access_sdfgapi(): sdfg = dace.SDFG('access2d_sdfg') sdfg.add_array('A', [4, 2], dace.float64) begin_state = sdfg.add_state() state_true = sdfg.add_state() state_false = sdfg.add_state() state_true.add_edge( state_true.add_tasklet('assign', {}, {'a'}, 'a = 100.0'), 'a', state_true.add_write('A'), None, dace.Memlet('A[0, 0]')) state_false.add_edge( state_false.add_tasklet('assign', {}, {'a'}, 'a = -100.0'), 'a', state_false.add_write('A'), None, dace.Memlet('A[0, 0]')) sdfg.add_edge(begin_state, state_true, dace.InterstateEdge('A[1,1] < 0.5')) sdfg.add_edge(begin_state, state_false, dace.InterstateEdge('A[1,1] >= 0.5')) # Prepare inputs A = np.random.rand(4, 2) expected = A.copy() expected[0, 0] = 100.0 if expected[1, 1] < 0.5 else -100.0 # Without control-flow detection A1 = A.copy() csdfg = sdfg.compile() csdfg(A=A1) assert np.allclose(A1, expected) del csdfg # With control-flow detection end_state = sdfg.add_state() sdfg.add_edge(state_true, end_state, dace.InterstateEdge()) sdfg.add_edge(state_false, end_state, dace.InterstateEdge()) assert 'else' in sdfg.generate_code()[0].code csdfg = sdfg.compile() csdfg(A=A) assert np.allclose(A, expected)
def test_fuse_assignment_in_use(): """ Two states with an interstate assignment in between, where the assigned value is used in the first state. Should fail. """ sdfg = dace.SDFG('state_fusion_test') sdfg.add_array('A', [2], dace.int32) state1, state2, state3, state4 = tuple(sdfg.add_state() for _ in range(4)) sdfg.add_edge(state1, state2, dace.InterstateEdge(assignments=dict(k=1))) sdfg.add_edge(state2, state3, dace.InterstateEdge()) sdfg.add_edge(state3, state4, dace.InterstateEdge(assignments=dict(k=2))) state3.add_edge(state3.add_tasklet('one', {}, {'a'}, 'a = k'), 'a', state3.add_write('A'), None, dace.Memlet('A[0]')) state4.add_edge(state3.add_tasklet('two', {}, {'a'}, 'a = k'), 'a', state3.add_write('A'), None, dace.Memlet('A[1]')) try: StateFusion.apply_to(sdfg, first_state=state3, second_state=state4) raise AssertionError('States fused, test failed') except ValueError: print('Exception successfully caught')
def _make_sdfg(name, storage=dace.dtypes.StorageType.CPU_Heap): N = dace.symbol('N', dtype=dace.int32, integer=True, positive=True) i = dace.symbol('i', dtype=dace.int32, integer=True) sdfg = dace.SDFG(name) _, A = sdfg.add_array('A', [N, N, N], dtype=dace.float64) _, B = sdfg.add_array('B', [N], dtype=dace.float64) _, tmp1 = sdfg.add_transient('tmp1', [N - 4, N - 4, N - i], dtype=dace.float64, storage=storage) _, tmp2 = sdfg.add_transient('tmp2', [1], dtype=dace.float64, storage=storage) begin_state = sdfg.add_state("begin", is_start_state=True) guard_state = sdfg.add_state("guard") body1_state = sdfg.add_state("body1") body2_state = sdfg.add_state("body2") body3_state = sdfg.add_state("body3") end_state = sdfg.add_state("end") sdfg.add_edge(begin_state, guard_state, dace.InterstateEdge(assignments=dict(i='0'))) sdfg.add_edge(guard_state, body1_state, dace.InterstateEdge(condition=f'i<{N}')) sdfg.add_edge(guard_state, end_state, dace.InterstateEdge(condition=f'i>={N}')) sdfg.add_edge(body1_state, body2_state, dace.InterstateEdge()) sdfg.add_edge(body2_state, body3_state, dace.InterstateEdge()) sdfg.add_edge(body3_state, guard_state, dace.InterstateEdge(assignments=dict(i='i+1'))) read_a = body1_state.add_read('A') write_tmp1 = body1_state.add_write('tmp1') body1_state.add_nedge(read_a, write_tmp1, dace.Memlet(f'A[2:{N}-2, 2:{N}-2, i:{N}]')) read_tmp1 = body2_state.add_read('tmp1') rednode = standard.Reduce(wcr='lambda a, b : a + b', identity=0) if storage == dace.dtypes.StorageType.GPU_Global: rednode.implementation = 'CUDA (device)' elif storage == dace.dtypes.StorageType.FPGA_Global: rednode.implementation = 'FPGAPartialReduction' body2_state.add_node(rednode) write_tmp2 = body2_state.add_write('tmp2') body2_state.add_nedge(read_tmp1, rednode, dace.Memlet.from_array('tmp1', tmp1)) body2_state.add_nedge(rednode, write_tmp2, dace.Memlet('tmp2[0]')) read_tmp2 = body3_state.add_read('tmp2') write_b = body3_state.add_write('B') body3_state.add_nedge(read_tmp2, write_b, dace.Memlet('B[i]')) return sdfg
def test_recursive_cprop(): sdfg = dace.SDFG('program') a = sdfg.add_state() b = sdfg.add_state() sdfg.add_edge(a, b, dace.InterstateEdge(assignments=dict(i=1))) nsdfg = dace.SDFG('nested') b.add_nested_sdfg(nsdfg, None, {}, {}, symbol_mapping={'i': 'i + 1'}) nstate = nsdfg.add_state() t = nstate.add_tasklet('doprint', {}, {}, 'printf("%d\\n", i)') ConstantPropagation().apply_pass(sdfg, {}) assert len(sdfg.symbols) == 0 assert len(nsdfg.symbols) == 0 assert '2' in t.code.as_string
def test_interstate_edge(self): try: sdfg = dace.SDFG('ok') state = sdfg.add_state('also_ok', is_start_state=True) A = state.add_array('A', [1], dace.float32) B = state.add_array('B', [1], dace.float32) t = state.add_tasklet('tasklet', {'a'}, {'b'}, 'b = a') state.add_edge(A, None, t, 'a', dace.Memlet.from_array(A.data, A.desc(sdfg))) state.add_edge(t, 'b', B, None, dace.Memlet.from_array(B.data, B.desc(sdfg))) sdfg.add_edge(state, state, dace.InterstateEdge(assignments={'%5': '1'})) sdfg.validate() self.fail('Failed to detect invalid interstate edge') except dace.sdfg.InvalidSDFGInterstateEdgeError as ex: print('Exception caught:', ex)
def test_sae_scalar(): # Construct SDFG sdfg = dace.SDFG('state_assign_elimination_test') sdfg.add_array('A', [20, 20], dace.float64) sdfg.add_array('B', [1], dace.float64) sdfg.add_scalar('scal', dace.int32, transient=True) initstate = sdfg.add_state() initstate.add_edge(initstate.add_tasklet('do', {}, {'out'}, 'out = 5'), 'out', initstate.add_write('scal'), None, dace.Memlet('scal')) state = sdfg.add_state() sdfg.add_edge(initstate, state, dace.InterstateEdge(assignments=dict(s2='scal'))) a = state.add_read('A') t = state.add_tasklet('do', {'inp'}, {'out'}, 'out = inp') b = state.add_write('B') state.add_edge(a, None, t, 'inp', dace.Memlet('A[s2, s2 + 1]')) state.add_edge(t, 'out', b, None, dace.Memlet('B[0]')) ####################################################### assert sdfg.apply_transformations(StateAssignElimination) == 0
def test_one_to_two_cc_fusion(): """ Two states, first with one connected component, second with two. """ sdfg = dace.SDFG('state_fusion_test') sdfg.add_array('A', [1], dace.int32) sdfg.add_array('B', [1], dace.int32) state1, state2 = tuple(sdfg.add_state() for _ in range(2)) sdfg.add_edge(state1, state2, dace.InterstateEdge()) # First state t1 = state1.add_tasklet('one', {}, {'a', 'b'}, 'a = 1; b = 2') state1.add_edge(t1, 'a', state1.add_write('A'), None, dace.Memlet('A')) state1.add_edge(t1, 'b', state1.add_write('B'), None, dace.Memlet('B')) # Second state state2.add_edge(state2.add_read('A'), None, state2.add_tasklet('one', {'a'}, {}, ''), 'a', dace.Memlet('A')) state2.add_edge(state2.add_read('B'), None, state2.add_tasklet('two', {'b'}, {}, ''), 'b', dace.Memlet('B')) assert sdfg.apply_transformations_repeated(StateFusion) == 1
def test_dse_unconditional(): sdfg = dace.SDFG('dse_tester') sdfg.add_symbol('a', dace.int32) s = sdfg.add_state() s1 = sdfg.add_state() s2 = sdfg.add_state() s3 = sdfg.add_state() e = sdfg.add_state() sdfg.add_edge(s, s1, dace.InterstateEdge('a > 0')) sdfg.add_edge(s, s2, dace.InterstateEdge('a >= a')) # Always True sdfg.add_edge(s, s3, dace.InterstateEdge('a < 0')) sdfg.add_edge(s1, e, dace.InterstateEdge()) sdfg.add_edge(s2, e, dace.InterstateEdge()) sdfg.add_edge(s3, e, dace.InterstateEdge()) DeadStateElimination().apply_pass(sdfg, {}) assert set(sdfg.states()) == {s, s2, e}
def make_sdfg(implementation, dtype, storage=dace.StorageType.Default, data_layout='CCC'): m = dace.symbol("m") n = dace.symbol("n") k = dace.symbol("k") suffix = "_device" if storage != dace.StorageType.Default else "" transient = storage != dace.StorageType.Default sdfg = dace.SDFG("mm_{}_{}".format(dtype.type.__name__, data_layout)) state = sdfg.add_state("dataflow") # Data layout is a 3-character string with either C (for row major) # or F (for column major) matrices for x, y, and z respectively. xstrides = (k, 1) if data_layout[0] == 'C' else (1, m) ystrides = (n, 1) if data_layout[1] == 'C' else (1, k) zstrides = (n, 1) if data_layout[2] == 'C' else (1, m) sdfg.add_array("x" + suffix, [m, k], dtype, storage=storage, transient=transient, strides=xstrides) sdfg.add_array("y" + suffix, [k, n], dtype, storage=storage, transient=transient, strides=ystrides) sdfg.add_array("result" + suffix, [m, n], dtype, storage=storage, transient=transient, strides=zstrides) x = state.add_read("x" + suffix) y = state.add_read("y" + suffix) result = state.add_write("result" + suffix) node = blas.nodes.matmul.MatMul("matmul", dtype) state.add_memlet_path(x, node, dst_conn="_a", memlet=Memlet.simple(x, "0:m, 0:k")) state.add_memlet_path(y, node, dst_conn="_b", memlet=Memlet.simple(y, "0:k, 0:n")) state.add_memlet_path(node, result, src_conn="_c", memlet=Memlet.simple(result, "0:m, 0:n")) if storage != dace.StorageType.Default: sdfg.add_array("x", [m, k], dtype) sdfg.add_array("y", [k, n], dtype) sdfg.add_array("result", [m, n], dtype) init_state = sdfg.add_state("copy_to_device") sdfg.add_edge(init_state, state, dace.InterstateEdge()) x_host = init_state.add_read("x") y_host = init_state.add_read("y") x_device = init_state.add_write("x" + suffix) y_device = init_state.add_write("y" + suffix) init_state.add_memlet_path(x_host, x_device, memlet=Memlet.simple(x_host, "0:m, 0:k")) init_state.add_memlet_path(y_host, y_device, memlet=Memlet.simple(y_host, "0:k, 0:n")) finalize_state = sdfg.add_state("copy_to_host") sdfg.add_edge(state, finalize_state, dace.InterstateEdge()) result_device = finalize_state.add_write("result" + suffix) result_host = finalize_state.add_read("result") finalize_state.add_memlet_path(result_device, result_host, memlet=Memlet.simple( result_device, "0:m, 0:n")) return sdfg
src_conn='out', memlet=dace.Memlet('fpga_C[i]')) # add copy to device state copy_to_device = sdfg.add_state('copy_to_device') cpu_a = copy_to_device.add_read('A') cpu_b = copy_to_device.add_read('B') dev_a = copy_to_device.add_write('fpga_A') dev_b = copy_to_device.add_write('fpga_B') copy_to_device.add_memlet_path(cpu_a, dev_a, memlet=dace.Memlet('A[0:N//VECLEN]')) copy_to_device.add_memlet_path(cpu_b, dev_b, memlet=dace.Memlet('B[0:N//VECLEN]')) sdfg.add_edge(copy_to_device, state, dace.InterstateEdge()) # add copy to host state copy_to_host = sdfg.add_state('copy_to_host') dev_c = copy_to_host.add_read('fpga_C') cpu_c = copy_to_host.add_write('C') copy_to_host.add_memlet_path(dev_c, cpu_c, memlet=dace.Memlet('C[0:N//VECLEN]')) sdfg.add_edge(state, copy_to_host, dace.InterstateEdge()) # validate sdfg sdfg.validate() ######################################################################
loopstate1 = sdfg.add_state('loops1') mystate(loopstate1, 'B', 'A') state2 = sdfg.add_state('s2') endstate(state2) # State connection (control flow) # Note: dataflow (arrays) CAN affect control flow assignments and conditions, # but not the other way around (you cannot change an interstate variable # inside a state). The following code works as well: #sdfg.add_edge(state0, guard, dace.InterstateEdge(assigments=dict('k', 'A[0]'))) # Loop initialization (k=0) sdfg.add_edge(state0, guard, dace.InterstateEdge(assignments=dict(k='0'))) # Loop condition (k < T / k >= T) sdfg.add_edge(guard, loopstate0, dace.InterstateEdge('k < T')) sdfg.add_edge(guard, state2, dace.InterstateEdge('k >= T')) # Loop incrementation (k++) sdfg.add_edge(loopstate1, guard, dace.InterstateEdge(assignments=dict(k='k+1'))) # Loop-internal interstate edges sdfg.add_edge(loopstate0, loopstate1, dace.InterstateEdge()) # Validate correctness of initial SDFG sdfg.validate()
def Export_loop(self, multi_stage: MultiStage, execution_order: ExecutionOrder): last_state = None first_state = None # This is the state previous to this ms for stage in multi_stage.stages: for do_method in stage.do_methods: reads = do_method.ReadIds() writes = do_method.WriteIds() all = reads | writes globals = {id for id in all if self.id_resolver.IsGlobal(id)} self.TryAddArray(self.sdfg, all - globals, transient=True) # self.TryAddScalar(self.sdfg, reads & globals) halo = ClosedInterval3D(Symbol('halo'), Symbol('halo'), Symbol('halo'), Symbol('halo'), 0, 0) halo -= stage.extents bc_dict = {"btype": "shrink", "halo": halo.to_6_tuple()} boundary_conditions = { f'{self.Name(id)}_out': bc_dict for id in writes } state = self.sdfg.add_state(str(do_method)) stenc = StencilLib( label=str(do_method), shape=[I, J, 1], accesses=self.Create_Variable_Access_map( do_method.Reads(), '_in'), # input fields output_fields=self.Create_Variable_Access_map( do_method.Writes(), '_out'), # output fields boundary_conditions=boundary_conditions, code=do_method.Code()) stenc.implementation = 'CPU' state.add_node(stenc) # Add memlet path from state.read to stencil. for id, acc in do_method.read_memlets.items(): name = self.Name(id) dims = self.Dimensions(id) subset = ','.join( dim_filter( dims, '0:I', '0:J', f'k+{acc.k.lower}:k+{acc.k.upper+1}')) or '0' state.add_memlet_path( state.add_read(name), stenc, memlet=dace.Memlet(f'{name}[{subset}]'), dst_conn=name + '_in', propagate=True) # Add memlet path from stencil to state.write. for id, acc in do_method.write_memlets.items(): name = self.Name(id) dims = self.Dimensions(id) subset = ','.join( dim_filter( dims, '0:I', '0:J', f'k+{acc.k.lower}:k+{acc.k.upper+1}')) or '0' state.add_memlet_path( stenc, state.add_write(name), memlet=dace.Memlet(f'{name}[{subset}]'), src_conn=name + '_out', propagate=True) if first_state is None: first_state = state if last_state is not None: self.sdfg.add_edge(last_state, state, dace.InterstateEdge()) last_state = state if execution_order == ExecutionOrder.Forward_Loop.value: initialize_expr = str(do_method.k_interval.lower) condition_expr = f'k < {do_method.k_interval.upper}' increment_expr = 'k + 1' else: initialize_expr = str(do_method.k_interval.upper - 1) condition_expr = f'k >= {do_method.k_interval.lower}' increment_expr = 'k - 1' print(initialize_expr, condition_expr, increment_expr) _, _, last_state = self.sdfg.add_loop(before_state=self.last_state_, loop_state=first_state, loop_end_state=last_state, after_state=None, loop_var='k', initialize_expr=initialize_expr, condition_expr=condition_expr, increment_expr=increment_expr) return last_state
def Export_parallel(self, multi_stage: MultiStage): ms_state = self.sdfg.add_state(f'ms_state_{CreateUID()}') ms_sdfg = dace.SDFG(f'ms_sdfg_{CreateUID()}') last_state = None for stage in multi_stage.stages: for do_method in stage.do_methods: reads = do_method.ReadIds() writes = do_method.WriteIds() all = reads | writes globals = {id for id in all if self.id_resolver.IsGlobal(id)} self.TryAddArray(ms_sdfg, all - globals) # self.TryAddScalar(ms_sdfg, reads & globals) self.TryAddArray(self.sdfg, all - globals, transient=True) # self.TryAddScalar(self.sdfg, reads & globals) halo = ClosedInterval3D(Symbol('halo'), Symbol('halo'), Symbol('halo'), Symbol('halo'), 0, 0) halo -= stage.extents bc_dict = {"btype": "shrink", "halo": halo.to_6_tuple()} boundary_conditions = { f'{self.Name(id)}_out': bc_dict for id in writes } state = ms_sdfg.add_state(str(do_method)) stenc = StencilLib( label=str(do_method), shape=[I, J, 1], accesses=self.Create_Variable_Access_map( do_method.Reads(), '_in'), # input fields output_fields=self.Create_Variable_Access_map( do_method.Writes(), '_out'), # output fields boundary_conditions=boundary_conditions, code=do_method.Code()) stenc.implementation = 'CPU' state.add_node(stenc) # Add memlet path from state.read to stencil. for id, acc in do_method.read_memlets.items(): name = self.Name(id) dims = self.Dimensions(id) subset = ','.join( dim_filter(dims, '0:I', '0:J', HalfOpenIntervalStr(acc.k))) or '0' state.add_memlet_path( state.add_read(name), stenc, memlet=dace.Memlet(f'{name}[{subset}]'), dst_conn=name + '_in', propagate=True) # Add memlet path from stencil to state.write. for id, acc in do_method.write_memlets.items(): name = self.Name(id) dims = self.Dimensions(id) subset = ','.join( dim_filter(dims, '0:I', '0:J', HalfOpenIntervalStr(acc.k))) or '0' state.add_memlet_path( stenc, state.add_write(name), memlet=dace.Memlet(f'{name}[{subset}]'), src_conn=name + '_out', propagate=True) # set the state to be the last one to connect them if last_state is not None: ms_sdfg.add_edge(last_state, state, dace.InterstateEdge()) last_state = state read_ids = multi_stage.ReadIds() write_ids = multi_stage.WriteIds() read_names = set(self.Name(id) for id in read_ids) write_names = set(self.Name(id) for id in write_ids) nested_sdfg = ms_state.add_nested_sdfg( ms_sdfg, self.sdfg, read_names, write_names, { 'halo': dace.symbol('halo'), 'I': dace.symbol('I'), 'J': dace.symbol('J'), 'K': dace.symbol('K'), 'IJK_stride_I': dace.symbol('IJK_stride_I'), 'IJK_stride_J': dace.symbol('IJK_stride_J'), 'IJK_stride_K': dace.symbol('IJK_stride_K'), 'IJK_total_size': dace.symbol('IJK_total_size'), 'IJ_stride_I': dace.symbol('IJ_stride_I'), 'IJ_stride_J': dace.symbol('IJ_stride_J'), 'IJ_total_size': dace.symbol('IJ_total_size'), 'I_total_size': dace.symbol('I_total_size'), 'J_total_size': dace.symbol('J_total_size'), 'K_total_size': dace.symbol('K_total_size') }) map_entry, map_exit = ms_state.add_map( "kmap", {'k': str(do_method.k_interval)}) for id, acc in multi_stage.read_memlets.items(): if id not in read_ids: continue name = self.Name(id) dims = self.Dimensions(id) subset = ','.join( dim_filter(dims, '0:I', '0:J', f'k+{acc.k.lower}:k+{acc.k.upper+1}')) or '0' # add the reads and the input memlet path : read -> map_entry -> nested_sdfg ms_state.add_memlet_path(ms_state.add_read(name), map_entry, nested_sdfg, memlet=dace.Memlet(f'{name}[{subset}]'), dst_conn=name, propagate=True) if len(read_ids) == 0: # If there are no inputs to this SDFG, connect it to the map with an empty memlet # to keep it in the scope. ms_state.add_edge(map_entry, None, nested_sdfg, None, dace.memlet.Memlet()) # output memlets for id, acc in multi_stage.write_memlets.items(): if id not in write_ids: continue name = self.Name(id) dims = self.Dimensions(id) subset = ','.join( dim_filter(dims, '0:I', '0:J', f'k+{acc.k.lower}:k+{acc.k.upper+1}')) or '0' # add the writes and the output memlet path : nested_sdfg -> map_exit -> write ms_state.add_memlet_path(nested_sdfg, map_exit, ms_state.add_write(name), memlet=dace.Memlet(f'{name}[{subset}]'), src_conn=name, propagate=True) if self.last_state_ is not None: self.sdfg.add_edge(self.last_state_, ms_state, dace.InterstateEdge()) return ms_state
def make_sdfg(dtype, name="pipeline_test"): n = dace.symbol("N") k = dace.symbol("K") m = dace.symbol("M") sdfg = dace.SDFG(name) pre_state = sdfg.add_state(name + "_pre") state = sdfg.add_state(name) post_state = sdfg.add_state(name + "_post") sdfg.add_edge(pre_state, state, dace.InterstateEdge()) sdfg.add_edge(state, post_state, dace.InterstateEdge()) _, desc_input_host = sdfg.add_array("a", (n, k, m), dtype) _, desc_output_host = sdfg.add_array("b", (n, k, m), dtype) desc_input_device = copy.copy(desc_input_host) desc_input_device.storage = dace.StorageType.FPGA_Global desc_input_device.location["bank"] = 0 desc_input_device.transient = True desc_output_device = copy.copy(desc_output_host) desc_output_device.storage = dace.StorageType.FPGA_Global desc_output_device.location["bank"] = 1 desc_output_device.transient = True sdfg.add_datadesc("a_device", desc_input_device) sdfg.add_datadesc("b_device", desc_output_device) # Host to device pre_read = pre_state.add_read("a") pre_write = pre_state.add_write("a_device") pre_state.add_memlet_path(pre_read, pre_write, memlet=dace.Memlet.simple(pre_write, "0:N, 0:K, 0:M")) # Device to host post_read = post_state.add_read("b_device") post_write = post_state.add_write("b") post_state.add_memlet_path(post_read, post_write, memlet=dace.Memlet.simple(post_write, "0:N, 0:K, 0:M")) # Compute state read_memory = state.add_read("a_device") write_memory = state.add_write("b_device") # Memory streams sdfg.add_stream("a_stream", dtype, storage=dace.StorageType.FPGA_Local, transient=True) sdfg.add_stream("b_stream", dtype, storage=dace.StorageType.FPGA_Local, transient=True) produce_input_stream = state.add_write("a_stream") consume_input_stream = state.add_read("a_stream") produce_output_stream = state.add_write("b_stream") consume_output_stream = state.add_write("b_stream") entry, exit = state.add_pipeline(name, { "n": "0:N", "k": "0:K", "m": "0:M", }, schedule=dace.ScheduleType.FPGA_Device, init_size=k * m, init_overlap=True, drain_size=k * m, drain_overlap=True) tasklet = state.add_tasklet( name, {"_in"}, {"_out"}, """_out = _in + (1 if {} else (3 if {} else 2))""".format( entry.pipeline.init_condition(), entry.pipeline.drain_condition())) # Container-to-container copies between arrays and streams state.add_memlet_path(read_memory, produce_input_stream, memlet=dace.Memlet.simple(read_memory.data, "0:N, 0:K, 0:M", other_subset_str="0", num_accesses=n * k * m)) state.add_memlet_path(consume_output_stream, write_memory, memlet=dace.Memlet.simple(write_memory.data, "0:N, 0:K, 0:M", other_subset_str="0", num_accesses=n * k * m)) # Input stream to buffer state.add_memlet_path(consume_input_stream, entry, tasklet, dst_conn="_in", memlet=dace.Memlet.simple( consume_input_stream.data, "0", num_accesses=-1)) # Buffer to output stream state.add_memlet_path(tasklet, exit, produce_output_stream, src_conn="_out", memlet=dace.Memlet.simple( produce_output_stream.data, "0", num_accesses=-1)) return sdfg
s2 = sdfg.add_state() # Arrays inp = s0.add_array('inp', [1], dp.float32) A = s0.add_array('A', [1], dp.float32) t = s0.add_tasklet('seta', {'a'}, {'b'}, 'b = a') s0.add_edge(inp, None, t, 'a', dp.Memlet.from_array(inp.data, inp.desc(sdfg))) s0.add_edge(t, 'b', A, None, dp.Memlet.from_array(A.data, A.desc(sdfg))) A = s1.add_array('A', [1], dp.float32) t = s1.add_tasklet('geta', {'a'}, {}, 'printf("ok %f\\n", a + 1)') s1.add_edge(A, None, t, 'a', dp.Memlet.from_array(A.data, A.desc(sdfg))) A = s2.add_array('A', [1], dp.float32) t = s2.add_tasklet('geta', {'a'}, {}, 'printf("BAD %f\\n", a - 1)') s2.add_edge(A, None, t, 'a', dp.Memlet.from_array(A.data, A.desc(sdfg))) sdfg.add_edge(s0, s1, dp.InterstateEdge('A[0] > 3')) sdfg.add_edge(s0, s2, dp.InterstateEdge('A[0] <= 3')) if __name__ == '__main__': print('Toplevel array usage in interstate edge') input = np.ndarray([1], np.float32) input[0] = 10 output = np.ndarray([1], np.float32) output[0] = 10 sdfg(inp=input, A=output) exit(0)