def test_quantitatively(sdfg, graph): A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(M.get()).astype(np.float64) C = np.random.rand(O.get()).astype(np.float64) out1_base = np.ndarray((N.get(), M.get()), np.float64) out2_base = np.ndarray((1), np.float64) out3_base = np.ndarray((N.get(), M.get(), O.get()), np.float64) out1 = np.ndarray((N.get(), M.get()), np.float64) out2 = np.ndarray((1), np.float64) out3 = np.ndarray((N.get(), M.get(), O.get()), np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, out1=out1_base, out2=out2_base, out3=out3_base, N=N, M=M, O=O) expand_reduce(sdfg, graph) expand_maps(sdfg, graph) subgraph = SubgraphView(graph, [node for node in graph.nodes()]) assert SubgraphFusion.match(sdfg, subgraph) == True fusion(sdfg, graph) sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, out1=out1, out2=out2, out3=out3, N=N, M=M, O=O) assert np.allclose(out1, out1_base) assert np.allclose(out2, out2_base) assert np.allclose(out3, out3_base) print('PASS')
def test_p3(in_transient, out_transient): sdfg = reduction_test_3.to_sdfg() sdfg.apply_strict_transformations() state = sdfg.nodes()[0] A = np.random.rand(M.get(), N.get()).astype(np.float64) B = np.random.rand(M.get(), N.get()).astype(np.float64) C1 = np.zeros([N.get()], dtype=np.float64) C2 = np.zeros([N.get()], dtype=np.float64) C3 = np.zeros([N.get()], dtype=np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, N=N, M=M) del csdfg expand_reduce(sdfg, state, create_in_transient=in_transient, create_out_transient=out_transient) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, N=N, M=M) del csdfg expand_maps(sdfg, state) fusion(sdfg, state) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C3, N=N, M=M) del csdfg assert np.linalg.norm(C1) > 0.01 assert np.allclose(C1, C2) assert np.allclose(C1, C3)
def test_subgraph(): A, expected = config() B_init = np.random.rand(2) graph = mapfission_sdfg() graph.apply_transformations(MapFission) dace.sdfg.propagation.propagate_memlets_sdfg(graph) cgraph = graph.compile() B = dcpy(B_init) cgraph(A=A, B=B) del cgraph assert np.allclose(B, expected) graph.validate() subgraph = SubgraphView(graph.nodes()[0], graph.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(graph, subgraph) fusion(graph, graph.nodes()[0], None) ccgraph = graph.compile() B = dcpy(B_init) ccgraph(A=A, B=B) assert np.allclose(B, expected) graph.validate()
def test_quantitatively(sdfg, graph): A = np.random.rand(M.get(), N.get()).astype(np.float64) B1 = np.zeros(shape=[M.get(), N.get()], dtype=np.float64) C1 = np.zeros(shape=[M.get(), N.get()], dtype=np.float64) B2 = np.zeros(shape=[M.get(), N.get()], dtype=np.float64) C2 = np.zeros(shape=[M.get(), N.get()], dtype=np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B1, C=C1, N=N, M=M) fusion(sdfg, graph) csdfg = sdfg.compile() csdfg(A=A, B=B2, C=C2, N=N, M=M) assert np.allclose(B1, B2) assert np.allclose(C1, C2)
def test_inputs_outputs(): """ Test subgraphs where the computation modules that are in the middle connect to the outside. """ sdfg = dace.SDFG('inputs_outputs_fission') sdfg.add_array('in1', [2], dace.float64) sdfg.add_array('in2', [2], dace.float64) sdfg.add_scalar('tmp', dace.float64, transient=True) sdfg.add_array('out1', [2], dace.float64) sdfg.add_array('out2', [2], dace.float64) state = sdfg.add_state() in1 = state.add_read('in1') in2 = state.add_read('in2') out1 = state.add_write('out1') out2 = state.add_write('out2') me, mx = state.add_map('outer', dict(i='0:2')) t1 = state.add_tasklet('t1', {'i1'}, {'o1', 'o2'}, 'o1 = i1 * 2; o2 = i1 * 5') t2 = state.add_tasklet('t2', {'i1', 'i2'}, {'o1'}, 'o1 = i1 * i2') state.add_memlet_path(in1, me, t1, dst_conn='i1', memlet=dace.Memlet.simple('in1', 'i')) state.add_memlet_path(in2, me, t2, dst_conn='i2', memlet=dace.Memlet.simple('in2', 'i')) state.add_edge(t1, 'o1', t2, 'i1', dace.Memlet.simple('tmp', '0')) state.add_memlet_path(t2, mx, out1, src_conn='o1', memlet=dace.Memlet.simple('out1', 'i')) state.add_memlet_path(t1, mx, out2, src_conn='o2', memlet=dace.Memlet.simple('out2', 'i')) sdfg.apply_transformations(MapFission) dace.sdfg.propagation.propagate_memlets_sdfg(sdfg) # Test A, B, C, D = tuple(np.random.rand(2) for _ in range(4)) expected_C = (A * 2) * B expected_D = A * 5 csdfg = sdfg.compile() C_cpy = deepcopy(C) D_cpy = deepcopy(D) csdfg(in1=A, in2=B, out1=C_cpy, out2=D_cpy) del csdfg assert np.allclose(C_cpy, expected_C) assert np.allclose(D_cpy, expected_D) subgraph = SubgraphView(sdfg.nodes()[0], sdfg.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, sdfg.nodes()[0], None) C_cpy = deepcopy(C) D_cpy = deepcopy(D) csdfg = sdfg.compile() csdfg(in1=A, in2=B, out1=C_cpy, out2=D_cpy) del csdfg assert np.allclose(C_cpy, expected_C) assert np.allclose(D_cpy, expected_D)
def test_offsets_array(): sdfg = dace.SDFG('mapfission_offsets2') sdfg.add_array('A', [20], dace.float64) sdfg.add_array('interim', [1], dace.float64, transient=True) state = sdfg.add_state() me, mx = state.add_map('outer', dict(i='10:20')) t1 = state.add_tasklet('addone', {'a'}, {'b'}, 'b = a + 1') interim = state.add_access('interim') t2 = state.add_tasklet('addtwo', {'a'}, {'b'}, 'b = a + 2') aread = state.add_read('A') awrite = state.add_write('A') state.add_memlet_path(aread, me, t1, dst_conn='a', memlet=dace.Memlet.simple('A', 'i')) state.add_edge(t1, 'b', interim, None, dace.Memlet.simple('interim', '0')) state.add_edge(interim, None, t2, 'a', dace.Memlet.simple('interim', '0')) state.add_memlet_path(t2, mx, awrite, src_conn='b', memlet=dace.Memlet.simple('A', 'i')) sdfg.apply_transformations(MapFission) dace.propagate_memlets_sdfg(sdfg) sdfg.validate() # Test A = np.random.rand(20) expected = A.copy() expected[10:] += 3 A_cpy = A.copy() csdfg = sdfg.compile() csdfg(A=A_cpy) del csdfg print(np.linalg.norm(A_cpy)) print(np.linalg.norm(expected)) assert (np.allclose(A_cpy, expected)) subgraph = SubgraphView(sdfg.nodes()[0], sdfg.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, sdfg.nodes()[0], None) A_cpy = A.copy() csdfg = sdfg.compile() csdfg(A=A_cpy) assert (np.allclose(A_cpy, expected))
def test_quantitatively(sdfg, graph): A = np.random.rand(N.get(), M.get(), O.get()).astype(np.float64) B = np.random.rand(N.get(), M.get(), O.get()).astype(np.float64) C1 = np.zeros([N.get(), M.get(), O.get()], dtype=np.float64) C2 = np.zeros([N.get(), M.get(), O.get()], dtype=np.float64) sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, N=N, M=M, O=O) fusion(sdfg, graph) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, N=N, M=M, O=O) assert np.allclose(C1, C2) print('PASS')
def test_offsets(): sdfg = dace.SDFG('mapfission_offsets') sdfg.add_array('A', [20], dace.float64) sdfg.add_scalar('interim', dace.float64, transient=True) state = sdfg.add_state() me, mx = state.add_map('outer', dict(i='10:20')) t1 = state.add_tasklet('addone', {'a'}, {'b'}, 'b = a + 1') t2 = state.add_tasklet('addtwo', {'a'}, {'b'}, 'b = a + 2') aread = state.add_read('A') awrite = state.add_write('A') state.add_memlet_path(aread, me, t1, dst_conn='a', memlet=dace.Memlet.simple('A', 'i')) state.add_edge(t1, 'b', t2, 'a', dace.Memlet.simple('interim', '0')) state.add_memlet_path(t2, mx, awrite, src_conn='b', memlet=dace.Memlet.simple('A', 'i')) sdfg.apply_transformations(MapFission) dace.propagate_memlets_sdfg(sdfg) sdfg.validate() # Test A = np.random.rand(20) A_cpy = A.copy() expected = A.copy() expected[10:] += 3 csdfg = sdfg.compile() csdfg(A=A_cpy) del csdfg assert (np.allclose(A_cpy, expected)) fusion(sdfg, sdfg.nodes()[0], None) csdfg = sdfg.compile() A_cpy = A.copy() csdfg(A=A_cpy) assert (np.allclose(A_cpy, expected))
def test_sequential(): N.set(1000) sdfg = test_program.to_sdfg() state = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(N.get()).astype(np.float64) C1 = np.random.rand(N.get()).astype(np.float64) C2 = np.random.rand(N.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, N=N) fusion(sdfg, state) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, N=N) assert np.allclose(C1, C2)
def test_2fuse(): sdfg = softmax.to_sdfg() sdfg._name = 'softmax_2part' sdfg.apply_strict_transformations() X_in = np.random.rand(H.get(), B.get(), SN.get(), SM.get()).astype(np.float32) csdfg = sdfg.compile() res1 = csdfg(X_in=X_in, H=H, B=B, SN=SN, SM=SM) subgraph = get_partition(sdfg, sdfg.nodes()[0]) expand_reduce(sdfg, sdfg.nodes()[0], subgraph) expand_maps(sdfg, sdfg.nodes()[0], subgraph) fusion(sdfg, sdfg.nodes()[0], subgraph) csdfg = sdfg.compile() res2 = csdfg(X_in=X_in, H=H, B=B, SN=SN, SM=SM) assert np.allclose(res1, res2) print("PASS") return
def test(): N.set(50) sdfg = program.to_sdfg() sdfg.apply_gpu_transformations() state = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) C1 = np.random.rand(N.get()).astype(np.float64) C2 = np.random.rand(N.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, C=C1, N=N) del csdfg fusion(sdfg, state) csdfg = sdfg.compile() csdfg(A=A, C=C2, N=N) print(np.linalg.norm(C1)) print(np.linalg.norm(C2)) assert np.allclose(C1, C2)
def _test_quantitatively(sdfg, graph): A = np.random.rand(N.get(), M.get(), O.get()).astype(np.float64) B = np.random.rand(N.get(), M.get(), O.get()).astype(np.float64) C1 = np.zeros([N.get(), M.get(), O.get()], dtype=np.float64) C2 = np.zeros([N.get(), M.get(), O.get()], dtype=np.float64) sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, N=N, M=M, O=O) del csdfg subgraph = SubgraphView(graph, graph.nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, graph) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, N=N, M=M, O=O) del csdfg assert np.allclose(C1, C2) print('PASS')
def test_1fuse(): sdfg = softmax.to_sdfg() sdfg.name = 'softmax_fused' sdfg.simplify() X_in = np.random.rand(H.get(), B.get(), SN.get(), SM.get()).astype(np.float32) csdfg = sdfg.compile() res1 = csdfg(X_in=X_in, H=H, B=B, SN=SN, SM=SM) del csdfg expand_reduce(sdfg, sdfg.nodes()[0]) expand_maps(sdfg, sdfg.nodes()[0]) fusion(sdfg, sdfg.nodes()[0]) csdfg = sdfg.compile() res2 = csdfg(X_in=X_in, H=H, B=B, SN=SN, SM=SM) del csdfg print(np.linalg.norm(res1)) print(np.linalg.norm(res2)) assert np.allclose(res1, res2) print("PASS") return
def test_1fuse(): sdfg = softmax.to_sdfg() sdfg._name = 'softmax_fused' sdfg.apply_strict_transformations() X_in = np.random.rand(H.get(), B.get(), SN.get(), SM.get()).astype(np.float32) csdfg = sdfg.compile() res1 = csdfg(X_in=X_in, H=H, B=B, SN=SN, SM=SM) expand_reduce(sdfg, sdfg.nodes()[0]) expand_maps(sdfg, sdfg.nodes()[0]) fusion(sdfg, sdfg.nodes()[0]) #sdfg.specialize({'SM':SM}) csdfg = sdfg.compile() res2 = csdfg(X_in=X_in, H=H, B=B, SN=SN, SM=SM) print(np.linalg.norm(res1)) print(np.linalg.norm(res2)) assert np.allclose(res1, res2) print("PASS") return
def test_qualitatively(sdfg, graph): fusion(sdfg, graph) sdfg.validate() print("PASS")
out1 >> B[i] out1 = in1 + 1 for i in dace.map[0:N]: with dace.tasklet: in1 << B[i] out1 >> C[i] out1 = in1 + 1 if __name__ == "__main__": N.set(50) sdfg = test_program.to_sdfg() sdfg.apply_gpu_transformations() state = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) C1 = np.random.rand(N.get()).astype(np.float64) C2 = np.random.rand(N.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, C=C1, N=N) fusion(sdfg, state) csdfg = sdfg.compile() csdfg(A=A, C=C2, N=N) print(np.linalg.norm(C1)) print(np.linalg.norm(C2)) assert np.allclose(C1, C2)
def test_qualitatively(sdfg, graph): expand_reduce(sdfg, graph) expand_maps(sdfg, graph) fusion(sdfg, graph) sdfg.validate() print("PASS")