def can_be_applied(self, sdfg: SDFG, subgraph: SubgraphView) -> bool: graph = subgraph.graph if self.allow_expansion == True: subgraph_fusion = SubgraphFusion() subgraph_fusion.setup_match(subgraph) if subgraph_fusion.can_be_applied(sdfg, subgraph): # try w/o copy first return True expansion = MultiExpansion() expansion.setup_match(subgraph) expansion.permutation_only = not self.expansion_split if expansion.can_be_applied(sdfg, subgraph): # deepcopy graph_indices = [ i for (i, n) in enumerate(graph.nodes()) if n in subgraph ] sdfg_copy = copy.deepcopy(sdfg) graph_copy = sdfg_copy.nodes()[sdfg.nodes().index(graph)] subgraph_copy = SubgraphView( graph_copy, [graph_copy.nodes()[i] for i in graph_indices]) expansion.sdfg_id = sdfg_copy.sdfg_id ##sdfg_copy.apply_transformations(MultiExpansion, states=[graph]) #expansion = MultiExpansion() #expansion.setup_match(subgraph_copy) expansion.apply(sdfg_copy) subgraph_fusion = SubgraphFusion() subgraph_fusion.setup_match(subgraph_copy) if subgraph_fusion.can_be_applied(sdfg_copy, subgraph_copy): return True stencil_tiling = StencilTiling() stencil_tiling.setup_match(subgraph_copy) if self.allow_tiling and stencil_tiling.can_be_applied( sdfg_copy, subgraph_copy): return True else: subgraph_fusion = SubgraphFusion() subgraph_fusion.setup_match(subgraph) if subgraph_fusion.can_be_applied(sdfg, subgraph): return True if self.allow_tiling == True: stencil_tiling = StencilTiling() stencil_tiling.setup_match(subgraph) if stencil_tiling.can_be_applied(sdfg, subgraph): return True return False
def test_subgraph(): A, expected = config() B_init = np.random.rand(2) graph = mapfission_sdfg() graph.apply_transformations(MapFission) dace.sdfg.propagation.propagate_memlets_sdfg(graph) cgraph = graph.compile() B = dcpy(B_init) cgraph(A=A, B=B) del cgraph assert np.allclose(B, expected) graph.validate() subgraph = SubgraphView(graph.nodes()[0], graph.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(graph, subgraph) fusion(graph, graph.nodes()[0], None) ccgraph = graph.compile() B = dcpy(B_init) ccgraph(A=A, B=B) assert np.allclose(B, expected) graph.validate()
def _test_quantitatively(sdfg): graph = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(N.get()).astype(np.float64) C1 = np.random.rand(N.get()).astype(np.float64) C2 = np.random.rand(N.get()).astype(np.float64) D1 = np.random.rand(N.get()).astype(np.float64) D2 = np.random.rand(N.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, D=D1, N=N) del csdfg subgraph = SubgraphView(graph, [node for node in graph.nodes()]) me = MultiExpansion(subgraph) assert me.can_be_applied(sdfg, subgraph) == True me.apply(sdfg) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) == True sf.apply(sdfg) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, D=D2, N=N) assert np.allclose(C1, C2) assert np.allclose(D1, D2)
def test_quantitatively(sdfg, graph): A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(M.get()).astype(np.float64) C = np.random.rand(O.get()).astype(np.float64) out1_base = np.ndarray((N.get(), M.get()), np.float64) out2_base = np.ndarray((1), np.float64) out3_base = np.ndarray((N.get(), M.get(), O.get()), np.float64) out1 = np.ndarray((N.get(), M.get()), np.float64) out2 = np.ndarray((1), np.float64) out3 = np.ndarray((N.get(), M.get(), O.get()), np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, out1=out1_base, out2=out2_base, out3=out3_base, N=N, M=M, O=O) expand_reduce(sdfg, graph) expand_maps(sdfg, graph) subgraph = SubgraphView(graph, [node for node in graph.nodes()]) assert SubgraphFusion.can_be_applied(sdfg, subgraph) == True fusion(sdfg, graph) sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, out1=out1, out2=out2, out3=out3, N=N, M=M, O=O) assert np.allclose(out1, out1_base) assert np.allclose(out2, out2_base) assert np.allclose(out3, out3_base) print('PASS')
def invoke_stencil(tile_size, offset=False, unroll=False, view=False): A = np.random.rand(N.get()).astype(np.float64) B1 = np.zeros((N.get()), dtype=np.float64) B2 = np.zeros((N.get()), dtype=np.float64) B3 = np.zeros((N.get()), dtype=np.float64) if offset: sdfg = stencil_offset.to_sdfg() else: sdfg = stencil.to_sdfg() sdfg.simplify() graph = sdfg.nodes()[0] if view: sdfg.view() # baseline sdfg.name = 'baseline' csdfg = sdfg.compile() csdfg(A=A, B=B1, N=N) del csdfg subgraph = SubgraphView(graph, [n for n in graph.nodes()]) st = StencilTiling() st.setup_match(subgraph) st.tile_size = (tile_size, ) st.schedule = dace.dtypes.ScheduleType.Sequential assert st.can_be_applied(sdfg, subgraph) if unroll: st.unroll_loops = True st.apply(sdfg) if view: sdfg.view() sdfg.name = 'tiled' sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B2, N=N) del csdfg assert np.allclose(B1, B2) sdfg.simplify() subgraph = SubgraphView(graph, [n for n in graph.nodes()]) sf = SubgraphFusion() sf.setup_match(subgraph) assert sf.can_be_applied(sdfg, subgraph) # also test consolidation sf.consolidate = True sf.apply(sdfg) sdfg.name = 'fused' csdfg = sdfg.compile() csdfg(A=A, B=B3, N=N) del csdfg print(np.linalg.norm(B1)) print(np.linalg.norm(B3)) assert np.allclose(B1, B2) assert np.allclose(B1, B3) print("PASS")
def test_p3(): sdfg = disjoint_test_3.to_sdfg() sdfg.simplify() state = sdfg.nodes()[0] assert len(sdfg.nodes()) == 1 subgraph = SubgraphView(state, state.nodes()) sf = SubgraphFusion(subgraph) assert not sf.can_be_applied(sdfg, subgraph)
def test_p2(): sdfg = disjoint_test_2.to_sdfg() sdfg.apply_strict_transformations() state = sdfg.nodes()[0] assert len(sdfg.nodes()) == 1 subgraph = SubgraphView(state, state.nodes()) sf = SubgraphFusion(subgraph) assert not sf.can_be_applied(sdfg, subgraph)
def test_p1(): N.set(20) M.set(30) O.set(50) P.set(40) Q.set(42) R.set(25) sdfg = subgraph_fusion_parallel.to_sdfg() sdfg.simplify() state = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(M.get()).astype(np.float64) C = np.random.rand(O.get()).astype(np.float64) D = np.random.rand(M.get()).astype(np.float64) E = np.random.rand(N.get()).astype(np.float64) F = np.random.rand(P.get()).astype(np.float64) G = np.random.rand(M.get()).astype(np.float64) H = np.random.rand(P.get()).astype(np.float64) I = np.random.rand(N.get()).astype(np.float64) J = np.random.rand(R.get()).astype(np.float64) X = np.random.rand(N.get()).astype(np.float64) Y = np.random.rand(M.get()).astype(np.float64) Z = np.random.rand(P.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J, X=X, Y=Y, Z=Z,\ N=N, M=M, O=O, P=P, R=R,Q=Q) del csdfg subgraph = SubgraphView(state, [node for node in state.nodes()]) expansion = MultiExpansion() expansion.setup_match(subgraph) fusion = SubgraphFusion() fusion.setup_match(subgraph) me = MultiExpansion() me.setup_match(subgraph) assert me.can_be_applied(sdfg, subgraph) me.apply(sdfg) sf = SubgraphFusion() sf.setup_match(subgraph) assert sf.can_be_applied(sdfg, subgraph) sf.apply(sdfg) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J, X=X, Y=Y, Z=Z,\ N=N, M=M, O=O, P=P, R=R,Q=Q) print("PASS")
def test_offsets_array(): sdfg = dace.SDFG('mapfission_offsets2') sdfg.add_array('A', [20], dace.float64) sdfg.add_array('interim', [1], dace.float64, transient=True) state = sdfg.add_state() me, mx = state.add_map('outer', dict(i='10:20')) t1 = state.add_tasklet('addone', {'a'}, {'b'}, 'b = a + 1') interim = state.add_access('interim') t2 = state.add_tasklet('addtwo', {'a'}, {'b'}, 'b = a + 2') aread = state.add_read('A') awrite = state.add_write('A') state.add_memlet_path(aread, me, t1, dst_conn='a', memlet=dace.Memlet.simple('A', 'i')) state.add_edge(t1, 'b', interim, None, dace.Memlet.simple('interim', '0')) state.add_edge(interim, None, t2, 'a', dace.Memlet.simple('interim', '0')) state.add_memlet_path(t2, mx, awrite, src_conn='b', memlet=dace.Memlet.simple('A', 'i')) sdfg.apply_transformations(MapFission) dace.propagate_memlets_sdfg(sdfg) sdfg.validate() # Test A = np.random.rand(20) expected = A.copy() expected[10:] += 3 A_cpy = A.copy() csdfg = sdfg.compile() csdfg(A=A_cpy) del csdfg print(np.linalg.norm(A_cpy)) print(np.linalg.norm(expected)) assert (np.allclose(A_cpy, expected)) subgraph = SubgraphView(sdfg.nodes()[0], sdfg.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, sdfg.nodes()[0], None) A_cpy = A.copy() csdfg = sdfg.compile() csdfg(A=A_cpy) assert (np.allclose(A_cpy, expected))
def test_inputs_outputs(): """ Test subgraphs where the computation modules that are in the middle connect to the outside. """ sdfg = dace.SDFG('inputs_outputs_fission') sdfg.add_array('in1', [2], dace.float64) sdfg.add_array('in2', [2], dace.float64) sdfg.add_scalar('tmp', dace.float64, transient=True) sdfg.add_array('out1', [2], dace.float64) sdfg.add_array('out2', [2], dace.float64) state = sdfg.add_state() in1 = state.add_read('in1') in2 = state.add_read('in2') out1 = state.add_write('out1') out2 = state.add_write('out2') me, mx = state.add_map('outer', dict(i='0:2')) t1 = state.add_tasklet('t1', {'i1'}, {'o1', 'o2'}, 'o1 = i1 * 2; o2 = i1 * 5') t2 = state.add_tasklet('t2', {'i1', 'i2'}, {'o1'}, 'o1 = i1 * i2') state.add_memlet_path(in1, me, t1, dst_conn='i1', memlet=dace.Memlet.simple('in1', 'i')) state.add_memlet_path(in2, me, t2, dst_conn='i2', memlet=dace.Memlet.simple('in2', 'i')) state.add_edge(t1, 'o1', t2, 'i1', dace.Memlet.simple('tmp', '0')) state.add_memlet_path(t2, mx, out1, src_conn='o1', memlet=dace.Memlet.simple('out1', 'i')) state.add_memlet_path(t1, mx, out2, src_conn='o2', memlet=dace.Memlet.simple('out2', 'i')) sdfg.apply_transformations(MapFission) dace.sdfg.propagation.propagate_memlets_sdfg(sdfg) # Test A, B, C, D = tuple(np.random.rand(2) for _ in range(4)) expected_C = (A * 2) * B expected_D = A * 5 csdfg = sdfg.compile() C_cpy = deepcopy(C) D_cpy = deepcopy(D) csdfg(in1=A, in2=B, out1=C_cpy, out2=D_cpy) del csdfg assert np.allclose(C_cpy, expected_C) assert np.allclose(D_cpy, expected_D) subgraph = SubgraphView(sdfg.nodes()[0], sdfg.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, sdfg.nodes()[0], None) C_cpy = deepcopy(C) D_cpy = deepcopy(D) csdfg = sdfg.compile() csdfg(in1=A, in2=B, out1=C_cpy, out2=D_cpy) del csdfg assert np.allclose(C_cpy, expected_C) assert np.allclose(D_cpy, expected_D)
def invoke_stencil(tile_size, offset=False, unroll=False): A = np.random.rand(N.get() * 2).astype(np.float64) B1 = np.zeros((N.get()), dtype=np.float64) B2 = np.zeros((N.get()), dtype=np.float64) B3 = np.zeros((N.get()), dtype=np.float64) if offset: sdfg = stencil_offset.to_sdfg() else: sdfg = stencil.to_sdfg() sdfg.simplify() graph = sdfg.nodes()[0] # baseline sdfg.name = f'baseline_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B1, N=N) del csdfg subgraph = SubgraphView(graph, [n for n in graph.nodes()]) st = StencilTiling() st.setup_match(subgraph) st.tile_size = (tile_size, ) st.unroll_loops = unroll assert st.can_be_applied(sdfg, subgraph) # change schedule so that OMP never fails st.schedule = dace.dtypes.ScheduleType.Sequential st.apply(sdfg) sdfg.name = f'tiled_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B2, N=N) del csdfg sdfg.simplify() subgraph = SubgraphView(graph, [n for n in graph.nodes()]) sf = SubgraphFusion() sf.setup_match(subgraph) assert sf.can_be_applied(sdfg, subgraph) sf.apply(sdfg) sdfg.name = f'fused_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B3, N=N) del csdfg print(np.linalg.norm(B1)) print(np.linalg.norm(B3)) print("PASS")
def apply(self, sdfg): subgraph = self.subgraph_view(sdfg) graph = subgraph.graph scope_dict = graph.scope_dict() map_entries = helpers.get_outermost_scope_maps(sdfg, graph, subgraph, scope_dict) first_entry = next(iter(map_entries)) if self.allow_expansion: expansion = MultiExpansion() expansion.setup_match(subgraph, self.sdfg_id, self.state_id) expansion.permutation_only = not self.expansion_split if expansion.can_be_applied(sdfg, subgraph): expansion.apply(sdfg) sf = SubgraphFusion() sf.setup_match(subgraph, self.sdfg_id, self.state_id) if sf.can_be_applied(sdfg, self.subgraph_view(sdfg)): # set SubgraphFusion properties sf.debug = self.debug sf.transient_allocation = self.transient_allocation sf.schedule_innermaps = self.schedule_innermaps sf.apply(sdfg) self._global_map_entry = sf._global_map_entry return elif self.allow_tiling == True: st = StencilTiling() st.setup_match(subgraph, self.sdfg_id, self.state_id) if st.can_be_applied(sdfg, self.subgraph_view(sdfg)): # set StencilTiling properties st.debug = self.debug st.unroll_loops = self.stencil_unroll_loops st.strides = self.stencil_strides st.apply(sdfg) # StencilTiling: update nodes new_entries = st._outer_entries subgraph = helpers.subgraph_from_maps(sdfg, graph, new_entries) sf = SubgraphFusion() sf.setup_match(subgraph, self.sdfg_id, self.state_id) # set SubgraphFusion properties sf.debug = self.debug sf.transient_allocation = self.transient_allocation sf.schedule_innermaps = self.schedule_innermaps sf.apply(sdfg) self._global_map_entry = sf._global_map_entry return warnings.warn("CompositeFusion::Apply did not perform as expected")
def test_p1(): N.set(20) M.set(30) O.set(50) P.set(40) Q.set(42) R.set(25) sdfg = program.to_sdfg() sdfg.apply_strict_transformations() state = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(M.get()).astype(np.float64) C = np.random.rand(O.get()).astype(np.float64) D = np.random.rand(M.get()).astype(np.float64) E = np.random.rand(N.get()).astype(np.float64) F = np.random.rand(P.get()).astype(np.float64) G = np.random.rand(M.get()).astype(np.float64) H = np.random.rand(P.get()).astype(np.float64) I = np.random.rand(N.get()).astype(np.float64) J = np.random.rand(R.get()).astype(np.float64) X = np.random.rand(N.get()).astype(np.float64) Y = np.random.rand(M.get()).astype(np.float64) Z = np.random.rand(P.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J, X=X, Y=Y, Z=Z,\ N=N, M=M, O=O, P=P, R=R,Q=Q) del csdfg subgraph = SubgraphView(state, [node for node in state.nodes()]) expansion = MultiExpansion(subgraph) fusion = SubgraphFusion(subgraph) assert MultiExpansion.can_be_applied(sdfg, subgraph) expansion.apply(sdfg) assert SubgraphFusion.can_be_applied(sdfg, subgraph) fusion.apply(sdfg) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J, X=X, Y=Y, Z=Z,\ N=N, M=M, O=O, P=P, R=R,Q=Q) print("PASS")
def _test_quantitatively(sdfg, graph): A = np.random.rand(N.get(), M.get(), O.get()).astype(np.float64) B = np.random.rand(N.get(), M.get(), O.get()).astype(np.float64) C1 = np.zeros([N.get(), M.get(), O.get()], dtype=np.float64) C2 = np.zeros([N.get(), M.get(), O.get()], dtype=np.float64) sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, N=N, M=M, O=O) del csdfg subgraph = SubgraphView(graph, graph.nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, graph) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, N=N, M=M, O=O) del csdfg assert np.allclose(C1, C2) print('PASS')
def test_p1(): sdfg = disjoint_test_1.to_sdfg() sdfg.simplify() state = sdfg.nodes()[0] assert len(sdfg.nodes()) == 1 A = np.random.rand(M.get(), 2).astype(np.float64) A1 = A.copy() A2 = A.copy() csdfg = sdfg.compile() csdfg(A=A1, N=N, M=M) del csdfg subgraph = SubgraphView(state, state.nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) sf.apply(sdfg) csdfg = sdfg.compile() csdfg(A=A2, M=M) del csdfg assert np.allclose(A1, A2)