def generate_node(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView, state_id: int, node: nodes.Node, function_stream: prettycode.CodeIOStream, callsite_stream: prettycode.CodeIOStream): # check instance type if isinstance(node, nodes.Tasklet): """ handle Tasklet: (1) generate in->tasklet (2) generate tasklet->out (3) generate tasklet """ # generate code to handle data input to the tasklet for edge in dfg.in_edges(node): # find input array src_node = find_input_arraynode(dfg, edge) # dispatch code gen (copy_memory) self.dispatcher.dispatch_copy(src_node, node, edge, sdfg, dfg, state_id, function_stream, callsite_stream) # generate code to handle data output from the tasklet for edge in dfg.out_edges(node): # find output array dst_node = find_output_arraynode(dfg, edge) # dispatch code gen (define_out_memlet) self.dispatcher.dispatch_output_definition( node, dst_node, edge, sdfg, dfg, state_id, function_stream, callsite_stream) # generate tasklet code self.unparse_tasklet(sdfg, dfg, state_id, node, function_stream, callsite_stream) else: raise RuntimeError( "Only tasklets are handled here, not {}. This should have been filtered by the predicate" .format(type(node)))
def weakly_connected_component(dfg, node_in_component: Node) -> StateSubgraphView: """ Returns a subgraph of all nodes that form the weakly connected component in `dfg` that contains `node_in_component`. """ seen = set() to_search = [node_in_component] while to_search: node = to_search.pop() if node in seen: continue seen.add(node) for succ in dfg.successors(node): to_search.append(succ) to_search = [node_in_component] seen.remove(node_in_component) while to_search: node = to_search.pop() if node in seen: continue seen.add(node) for succ in dfg.predecessors(node): to_search.append(succ) subgraph = StateSubgraphView(dfg, seen) return subgraph
def _extend_subgraph_with_access_nodes(state: SDFGState, subgraph: StateSubgraphView) -> StateSubgraphView: """ Expands a subgraph view to include necessary input/output access nodes, using memlet paths. """ sdfg = state.parent result: List[nd.Node] = copy.copy(subgraph.nodes()) queue: Deque[nd.Node] = deque(subgraph.nodes()) # Add all nodes in memlet paths while len(queue) > 0: node = queue.pop() if isinstance(node, nd.AccessNode): if isinstance(node.desc(sdfg), data.View): vnode = sdutil.get_view_node(state, node) result.append(vnode) queue.append(vnode) continue for e in state.in_edges(node): # Special case: IN_* connectors are not traversed further if isinstance(e.dst, (nd.EntryNode, nd.ExitNode)) and e.dst_conn.startswith('IN_'): continue mpath = state.memlet_path(e) new_nodes = [mpe.src for mpe in mpath if mpe.src not in result] result.extend(new_nodes) # Memlet path may end in a code node, continue traversing and expanding graph queue.extend(new_nodes) for e in state.out_edges(node): # Special case: OUT_* connectors are not traversed further if isinstance(e.src, (nd.EntryNode, nd.ExitNode)) and e.src_conn.startswith('OUT_'): continue mpath = state.memlet_path(e) new_nodes = [mpe.dst for mpe in mpath if mpe.dst not in result] result.extend(new_nodes) # Memlet path may end in a code node, continue traversing and expanding graph queue.extend(new_nodes) # Check for mismatch in scopes for node in result: enode = None if isinstance(node, nd.EntryNode) and state.exit_node(node) not in result: enode = state.exit_node(node) if isinstance(node, nd.ExitNode) and state.entry_node(node) not in result: enode = state.entry_node(node) if enode is not None: raise ValueError(f'Cutout cannot expand graph implicitly since "{node}" is in the graph and "{enode}" is ' 'not. Please provide more nodes in the subgraph as necessary.') return StateSubgraphView(state, result)
def _containers_defined_outside(sdfg: SDFG, state: SDFGState, subgraph: StateSubgraphView) -> Set[str]: """ Returns a list of containers set outside the given subgraph. """ # Since we care about containers that are written to, we only need to look at access nodes rather than interstate # edges result: Set[str] = set() for ostate in sdfg.nodes(): for node in ostate.data_nodes(): if ostate is not state or node not in subgraph.nodes(): if ostate.in_degree(node) > 0: result.add(node.data) # Add all new sink nodes of new subgraph for dnode in subgraph.data_nodes(): if subgraph.out_degree(dnode) == 0 and state.out_degree(dnode) > 0: result.add(dnode.data) return result
def cutout_state(state: SDFGState, *nodes: nd.Node, make_copy: bool = True) -> SDFG: """ Cut out a subgraph of a state from an SDFG to run separately for localized testing or optimization. The subgraph defined by the list of nodes will be extended to include access nodes of data containers necessary to run the graph separately. In addition, all transient data containers created outside the cut out graph will become global. :param state: The SDFG state in which the subgraph resides. :param nodes: The nodes in the subgraph to cut out. :param make_copy: If True, deep-copies every SDFG element in the copy. Otherwise, original references are kept. """ create_element = copy.deepcopy if make_copy else (lambda x: x) sdfg = state.parent subgraph: StateSubgraphView = StateSubgraphView(state, nodes) subgraph = _extend_subgraph_with_access_nodes(state, subgraph) other_arrays = _containers_defined_outside(sdfg, state, subgraph) # Make a new SDFG with the included constants, used symbols, and data containers new_sdfg = SDFG(f'{state.parent.name}_cutout', sdfg.constants_prop) defined_syms = subgraph.defined_symbols() freesyms = subgraph.free_symbols for sym in freesyms: new_sdfg.add_symbol(sym, defined_syms[sym]) for dnode in subgraph.data_nodes(): if dnode.data in new_sdfg.arrays: continue new_desc = sdfg.arrays[dnode.data].clone() # If transient is defined outside, it becomes a global if dnode.data in other_arrays: new_desc.transient = False new_sdfg.add_datadesc(dnode.data, new_desc) # Add a single state with the extended subgraph new_state = new_sdfg.add_state(state.label, is_start_state=True) inserted_nodes: Dict[nd.Node, nd.Node] = {} for e in subgraph.edges(): if e.src not in inserted_nodes: inserted_nodes[e.src] = create_element(e.src) if e.dst not in inserted_nodes: inserted_nodes[e.dst] = create_element(e.dst) new_state.add_edge(inserted_nodes[e.src], e.src_conn, inserted_nodes[e.dst], e.dst_conn, create_element(e.data)) # Insert remaining isolated nodes for n in subgraph.nodes(): if n not in inserted_nodes: inserted_nodes[n] = create_element(n) new_state.add_node(inserted_nodes[n]) # Remove remaining dangling connectors from scope nodes for node in inserted_nodes.values(): used_connectors = set(e.dst_conn for e in new_state.in_edges(node)) for conn in (node.in_connectors.keys() - used_connectors): node.remove_in_connector(conn) used_connectors = set(e.src_conn for e in new_state.out_edges(node)) for conn in (node.out_connectors.keys() - used_connectors): node.remove_out_connector(conn) return new_sdfg
def separate_maps(state, dfg, schedule): """ Separates the given ScopeSubgraphView into subgraphs with and without maps of the given schedule type. The function assumes that the given ScopeSubgraph view does not contain any concurrent segments (i.e. pass it through concurrent_subgraphs first). Only top level maps will be accounted for, if the desired schedule occurs in another (undesired) map, it will be ignored. Returns a list with the subgraph views in order of the original DFG. ScopeSubgraphViews for the parts with maps, StateSubgraphViews for the parts without maps. """ from dace import nodes from dace.sdfg.scope import StateSubgraphView sorted_nodes = list(dfs_topological_sort(dfg, dfg.source_nodes()[0])) nodes_to_skip = [dfg.source_nodes()[0], dfg.sink_nodes()[0]] result = [] current = [] for node in sorted_nodes: if node in nodes_to_skip: continue if isinstance(node, nodes.MapEntry): if node.map.schedule == schedule: result.append(StateSubgraphView(state, current)) result.append(state.scope_subgraph(node)) nodes_to_skip += result[-1].nodes() current = [] else: temp_nodes = state.scope_subgraph(node).nodes() nodes_to_skip += temp_nodes current += temp_nodes else: current.append(node) if len(current) > 0: result.append(StateSubgraphView(state, current)) return result
def test_internal_outarray(): sdfg = dace.SDFG('internal_outarr') sdfg.add_array('A', [20], dace.float64) state = sdfg.add_state() me, mx = state.add_map('_', dict(i='0:1')) t = state.add_tasklet('doit', {}, {'a'}, 'a = 0') w = state.add_write('A') state.add_nedge(me, t, dace.Memlet()) state.add_edge(t, 'a', w, None, dace.Memlet('A[1]')) state.add_nedge(w, mx, dace.Memlet()) subgraph = StateSubgraphView(state, [t, w]) nest_state_subgraph(sdfg, state, subgraph) a = np.random.rand(20) sdfg(A=a) assert a[1] == 0
def copy_memory(self, sdfg: dace.SDFG, dfg: StateSubgraphView, state_id: int, src_node: nodes.Node, dst_node: nodes.Node, edge: MultiConnectorEdge, function_stream: CodeIOStream, callsite_stream: CodeIOStream): # Obtain source and destination information, handle access<->tasklet # If copying from tensor core fragments to/from tasklets, we only need # to emit a reference, as the fragment contains the memory. src_desc = (src_node.desc(sdfg) if isinstance(src_node, nodes.AccessNode) else None) # Tasklet -> Array if not src_desc: local_name = dfg.memlet_path(edge)[0].src_conn callsite_stream.write( 'auto& %s = %s;' % (local_name, dst_node.data), sdfg, state_id, [src_node, dst_node]) return dst_desc = (dst_node.desc(sdfg) if isinstance(dst_node, nodes.AccessNode) else None) # Array -> Tasklet if not dst_desc: local_name = dfg.memlet_path(edge)[-1].dst_conn callsite_stream.write( 'auto& %s = %s;' % (local_name, src_node.data), sdfg, state_id, [src_node, dst_node]) return nontc_desc = (dst_desc if 'TensorCore' in src_desc.storage.name else src_desc) nontc_node = (dst_node if 'TensorCore' in src_desc.storage.name else src_node) # Majority is detected by the strides of the data row_major = True if nontc_desc.strides[-1] == 1 else False ##################################################################### # Set non-tensor-core C++ expression based on memlet if edge.data.data == nontc_node.data: other_expr = cpp_array_expr(sdfg, edge.data) elif edge.data.other_subset is not None: offset_cppstr = cpp_offset_expr(nontc_desc, edge.data.other_subset) other_expr = '%s[%s]' % (nontc_node.data, offset_cppstr) else: other_expr = '%s[0]' % nontc_node.data ##################################################################### # Emit copy code if 'TensorCore' in dst_desc.storage.name: # GPU memory to Tensor Cores callsite_stream.write( 'wmma::load_matrix_sync({tc}, &{other}, ' '{stride});'.format( tc=dst_node.data, other=other_expr, stride=src_desc.strides[0 if row_major else 1]), sdfg, state_id, [src_node, dst_node]) else: # Tensor Cores to GPU memory callsite_stream.write( 'wmma::store_matrix_sync(&{other}, {tc}, ' '{stride}, wmma::mem_{maj}_major);'.format( tc=src_node.data, other=other_expr, maj='row' if row_major else 'col', stride=dst_desc.strides[0 if row_major else 1]), sdfg, state_id, [src_node, dst_node])