def dispatch_subgraph(self, sdfg, dfg, state_id, function_stream, callsite_stream, skip_entry_node=False): """ Dispatches a code generator for a scope subgraph of an `SDFGState`. """ start_nodes = list(v for v in dfg.nodes() if len(list(dfg.predecessors(v))) == 0) # Mark nodes to skip in order to be able to skip nodes_to_skip = set() if skip_entry_node: assert len(start_nodes) == 1 nodes_to_skip.add(start_nodes[0]) for v in dfs_topological_sort(dfg, start_nodes): if v in nodes_to_skip: continue if isinstance(v, nodes.MapEntry): scope_subgraph = sdfg.node(state_id).scope_subgraph(v) self.dispatch_scope(v.map.schedule, sdfg, scope_subgraph, state_id, function_stream, callsite_stream) # Skip scope subgraph nodes nodes_to_skip.update(scope_subgraph.nodes()) else: self.dispatch_node(sdfg, dfg, state_id, v, function_stream, callsite_stream)
def _detect_constraints(self): """ Detects scalar/vector constraints on the graph based on the following two rules: * Reads/writes containing the loop param are Vectors * Reads/writes from/to an Array access node without loop param is always a Scalar """ for node in dfs_topological_sort(self.subgraph): if isinstance(node, nodes.Tasklet): for edge in self.state.in_edges(node): if self._carries_vector_data(edge): # In connector must be vector since Memlet carries vector data self.conn_to_node[(node, edge.dst_conn, True)].infer_as(InferenceNode.Vector) elif self._carries_scalar_data(edge): # Reading a scalar (with no loop param) from an Array # AccessNode is always a scalar src_node = self.state.memlet_path(edge)[0].src if isinstance(src_node, nodes.AccessNode) and isinstance(src_node.desc(self.sdfg), data.Array): self.conn_to_node[(node, edge.dst_conn, True)].infer_as(InferenceNode.Scalar) for edge in self.state.out_edges(node): if self._carries_vector_data(edge): # Out connector must be vector since Memlet carries vector data self.conn_to_node[(node, edge.src_conn, False)].infer_as(InferenceNode.Vector) elif self._carries_scalar_data(edge): # Writing a scalar (with no loop param) to an Array # AccessNode is always a scalar dst_node = self.state.memlet_path(edge)[-1].dst if isinstance(dst_node, nodes.AccessNode) and isinstance(dst_node.desc(self.sdfg), data.Array): self.conn_to_node[(node, edge.src_conn, False)].infer_as(InferenceNode.Scalar)
def infer_connector_types(sdfg: SDFG, state: SDFGState = None, graph: SubgraphView = None, inferred: TypeInferenceDict = None): """ Infers the connector types of an SDFG, state or subgraph and returns them in a dictionary consisting of tuples with node, name and a bool whether it is an input connector (`True` for input, `False` for output). This method does not modify the connectors, meaning it is read-only. To apply the changes, use `apply_connector_types`. It can be executed in different modes, depending on the provided arguments: * on an SDFG by only providing `sdfg` * on a state by providing `sdfg` and `state` * on a subgraph by providing `sdfg`, `state` and `graph` :param sdfg: The SDFG to infer. :param state: The state to infer. :param graph: The graph to infer. :param inferred: The dictionary of already inferred types. """ if inferred is None: inferred = TypeInferenceDict() if sdfg is None: raise ValueError('No SDFG was provided') if state is None and graph is None: for state in sdfg.nodes(): for node in dfs_topological_sort(state): infer_node_connectors(sdfg, state, node, inferred) elif state is not None and graph is None: for node in dfs_topological_sort(state): infer_node_connectors(sdfg, state, node, inferred) elif state is not None and graph is not None: for node in dfs_topological_sort(graph): infer_node_connectors(sdfg, state, node, inferred) else: raise ValueError('Missing some arguments') return inferred
def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): guard = graph.node(candidate[DetectLoop._loop_guard]) begin = graph.node(candidate[DetectLoop._loop_begin]) # A for-loop guard only has two incoming edges (init and increment) guard_inedges = graph.in_edges(guard) if len(guard_inedges) != 2: return False # A for-loop guard only has two outgoing edges (loop and exit-loop) guard_outedges = graph.out_edges(guard) if len(guard_outedges) != 2: return False # Both incoming edges to guard must set exactly one variable and # the same one if (len(guard_inedges[0].data.assignments) != 1 or len(guard_inedges[1].data.assignments) != 1): return False itervar = list(guard_inedges[0].data.assignments.keys())[0] if itervar not in guard_inedges[1].data.assignments: return False # Outgoing edges must not have assignments and be a negation of each # other if any(len(e.data.assignments) > 0 for e in guard_outedges): return False if guard_outedges[0].data.condition_sympy() != (sp.Not( guard_outedges[1].data.condition_sympy())): return False # All nodes inside loop must be dominated by loop guard dominators = nx.dominance.immediate_dominators(sdfg.nx, sdfg.start_state) loop_nodes = sdutil.dfs_topological_sort( sdfg, sources=[begin], condition=lambda _, child: child != guard) backedge_found = False for node in loop_nodes: if any(e.dst == guard for e in graph.out_edges(node)): backedge_found = True # Traverse the dominator tree upwards, if we reached the guard, # the node is in the loop. If we reach the starting state # without passing through the guard, fail. dom = node while dom != dominators[dom]: if dom == guard: break dom = dominators[dom] else: return False if not backedge_found: return False return True
def iterate_over_passes(self) -> Iterator[Pass]: """ Iterates over passes in the pipeline, potentially multiple times based on which elements were modified in the pass. Note that this method may be overridden by subclasses to modify pass order. """ # Lazily create dependency graph if self._depgraph is None: self._depgraph = self._make_dependency_graph() # Maintain a dictionary for each applied pass: # * Whenever a pass is applied, it is set to Nothing (as nothing was modified yet) # * As other passes apply, all existing passes union with what the current pass modified # This allows us to check, for each pass, whether it (and its dependencies) should reapply since it was last # applied. applied_passes: Dict[Pass, Modifies] = {} def reapply_recursive(p: Pass): """ Reapply pass dependencies in a recursive fashion. """ # If pass should not reapply, skip if p in applied_passes and not p.should_reapply(applied_passes[p]): return # Check dependencies first for dep in self._depgraph.predecessors(p): yield from reapply_recursive(dep) yield p # Traverse dependency graph topologically and for every node, check if modified elements require # reapplying dependencies for p in sdutil.dfs_topological_sort(self._depgraph): p: Pass # If pass was visited (applied) and it (or any of its dependencies) needs reapplication for pass_to_apply in reapply_recursive(p): # Reset modified elements, yield pass, update the other applied passes with what changed self._modified = Modifies.Nothing yield pass_to_apply for old_pass in applied_passes.keys(): applied_passes[old_pass] |= self._modified applied_passes[pass_to_apply] = Modifies.Nothing
def infer_connector_types(sdfg: SDFG): """ Infers connector types throughout an SDFG and its nested SDFGs in-place. :param sdfg: The SDFG to infer. """ # Loop over states, and in a topological sort over each state's nodes for state in sdfg.nodes(): for node in dfs_topological_sort(state): # Try to infer input connector type from node type or previous edges for e in state.in_edges(node): cname = e.dst_conn if cname is None: continue scalar = (e.data.subset and e.data.subset.num_elements() == 1) if e.data.data is not None: allocated_as_scalar = (sdfg.arrays[e.data.data].storage is not dtypes.StorageType.GPU_Global) else: allocated_as_scalar = True if node.in_connectors[cname].type is None: # If nested SDFG, try to use internal array type if isinstance(node, nodes.NestedSDFG): scalar = (isinstance(node.sdfg.arrays[cname], data.Scalar) and allocated_as_scalar) dtype = node.sdfg.arrays[cname].dtype ctype = (dtype if scalar else dtypes.pointer(dtype)) elif e.data.data is not None: # Obtain type from memlet src_edge = state.memlet_path(e)[0] if src_edge.src_conn is not None: ctype = src_edge.src.out_connectors[src_edge.src_conn] else: scalar |= isinstance(sdfg.arrays[e.data.data], data.Scalar) if isinstance(node, nodes.LibraryNode): scalar &= allocated_as_scalar dtype = sdfg.arrays[e.data.data].dtype ctype = (dtype if scalar else dtypes.pointer(dtype)) else: # Code->Code src_edge = state.memlet_path(e)[0] sconn = src_edge.src.out_connectors[src_edge.src_conn] if sconn.type is None: raise TypeError('Ambiguous or uninferable type in' ' connector "%s" of node "%s"' % (sconn, src_edge.src)) ctype = sconn node.in_connectors[cname] = ctype # Let the node infer other output types on its own node.infer_connector_types(sdfg, state) # Try to infer outputs from output edges for e in state.out_edges(node): cname = e.src_conn if cname is None: continue scalar = (e.data.subset and e.data.subset.num_elements() == 1 and (not e.data.dynamic or (e.data.dynamic and e.data.wcr is not None))) if e.data.data is not None: allocated_as_scalar = (sdfg.arrays[e.data.data].storage is not dtypes.StorageType.GPU_Global) else: allocated_as_scalar = True if node.out_connectors[cname].type is None: # If nested SDFG, try to use internal array type if isinstance(node, nodes.NestedSDFG): scalar = (isinstance(node.sdfg.arrays[cname], data.Scalar) and allocated_as_scalar) dtype = node.sdfg.arrays[cname].dtype ctype = (dtype if scalar else dtypes.pointer(dtype)) elif e.data.data is not None: # Obtain type from memlet scalar |= isinstance(sdfg.arrays[e.data.data], data.Scalar) if isinstance(node, nodes.LibraryNode): scalar &= allocated_as_scalar dtype = sdfg.arrays[e.data.data].dtype ctype = (dtype if scalar else dtypes.pointer(dtype)) else: continue node.out_connectors[cname] = ctype # If there are any remaining uninferable connectors, fail for e in state.out_edges(node): cname = e.src_conn if cname and node.out_connectors[cname].type is None: raise TypeError('Ambiguous or uninferable type in' ' connector "%s" of node "%s"' % (cname, node))
def _build(self): """ Builds the vector inference graph. """ # Create all necessary nodes for node in dfs_topological_sort(self.subgraph): if isinstance(node, nodes.Tasklet): non_pointer_in_conns = [ conn for conn in node.in_connectors if not isinstance(self.inf[(node, conn, True)], dtypes.pointer) ] non_pointer_out_conns = [ conn for conn in node.out_connectors if not isinstance(self.inf[(node, conn, False)], dtypes.pointer) ] # Create a node for every non-pointer input connector in_nodes = {} for conn in non_pointer_in_conns: n = InferenceNode((node, conn, True)) self.conn_to_node[(node, conn, True)] = n in_nodes[conn] = n self.add_node(n) # Create a node for every non-pointer output connector out_nodes = {} for conn in non_pointer_out_conns: n = InferenceNode((node, conn, False)) self.conn_to_node[(node, conn, False)] = n out_nodes[conn] = n self.add_node(n) # Connect the inputs of every union to its corresponding output for out, inputs in self._get_output_subsets(node).items(): for inp in inputs: self.add_edge( in_nodes[inp], out_nodes[out], mode=VectorInferenceGraph.Propagate_Default) elif isinstance(node, nodes.AccessNode): desc = node.desc(self.sdfg) if isinstance(desc, data.Scalar): # Only create nodes for Scalar AccessNodes (they can get a vector dtype) n = InferenceNode(node) self.conn_to_node[node] = n self.add_node(n) else: # Some other node occurs in the graph, not supported raise VectorInferenceException( 'Only Tasklets and AccessNodes are supported') # Create edges based on connectors for node in dfs_topological_sort(self.subgraph): if isinstance(node, nodes.Tasklet): for e in self.state.in_edges(node): if isinstance(e.src, nodes.Tasklet): self._try_add_edge( self.conn_to_node[(e.src, e.src_conn, False)], self.conn_to_node[(node, e.dst_conn, True)], self._get_propagation_mode(e)) elif isinstance(e.src, nodes.AccessNode): self._try_add_edge( self.conn_to_node[e.src], self.conn_to_node[(node, e.dst_conn, True)], self._get_propagation_mode(e)) elif isinstance(node, nodes.AccessNode): for e in self.state.in_edges(node): if isinstance(e.src, nodes.Tasklet): self._try_add_edge( self.conn_to_node[(e.src, e.src_conn, False)], self.conn_to_node[node], self._get_propagation_mode(e)) elif isinstance(e.src, nodes.AccessNode): # TODO: What does that mean? self._try_add_edge(self.conn_to_node[e.src], self.conn_to_node[node], self._get_propagation_mode(e))
def apply_pass( self, sdfg: SDFG, pipeline_results: Dict[str, Any]) -> Optional[Dict[SDFGState, Set[str]]]: """ Removes unreachable dataflow throughout SDFG states. :param sdfg: The SDFG to modify. :param pipeline_results: If in the context of a ``Pipeline``, a dictionary that is populated with prior Pass results as ``{Pass subclass name: returned object from pass}``. If not run in a pipeline, an empty dictionary is expected. :return: A dictionary mapping states to removed data descriptor names, or None if nothing changed. """ # Depends on the following analysis passes: # * State reachability # * Read/write access sets per state reachable: Dict[SDFGState, Set[SDFGState]] = pipeline_results['StateReachability'] access_sets: Dict[SDFGState, Tuple[Set[str], Set[str]]] = pipeline_results['AccessSets'] result: Dict[SDFGState, Set[str]] = defaultdict(set) # Traverse SDFG backwards for state in reversed(list(cfg.stateorder_topological_sort(sdfg))): ############################################# # Analysis ############################################# # Compute states where memory will no longer be read writes = access_sets[state][1] descendants = reachable[state] descendant_reads = set().union(*(access_sets[succ][0] for succ in descendants)) no_longer_used: Set[str] = set(data for data in writes if data not in descendant_reads) # Compute dead nodes dead_nodes: List[nodes.Node] = [] # Propagate deadness backwards within a state for node in sdutil.dfs_topological_sort(state, reverse=True): if self._is_node_dead(node, sdfg, state, dead_nodes, no_longer_used): dead_nodes.append(node) # Scope exit nodes are only dead if their corresponding entry nodes are live_nodes = set() for node in dead_nodes: if isinstance(node, nodes.ExitNode) and state.entry_node( node) not in dead_nodes: live_nodes.add(node) dead_nodes = dtypes.deduplicate( [n for n in dead_nodes if n not in live_nodes]) if not dead_nodes: continue # Remove nodes while preserving scopes scopes_to_reconnect: Set[nodes.Node] = set() for node in state.nodes(): # Look for scope exits that will be disconnected if isinstance(node, nodes.ExitNode) and node not in dead_nodes: if any(n in dead_nodes for n in state.predecessors(node)): scopes_to_reconnect.add(node) # Two types of scope disconnections may occur: # 1. Two scope exits will no longer be connected # 2. A predecessor of dead nodes is in a scope and not connected to its exit # Case (1) is taken care of by ``remove_memlet_path`` # Case (2) is handled below # Reconnect scopes if scopes_to_reconnect: schildren = state.scope_children() for exit_node in scopes_to_reconnect: entry_node = state.entry_node(exit_node) for node in schildren[entry_node]: if node is exit_node: continue if isinstance(node, nodes.EntryNode): node = state.exit_node(node) # If node will be disconnected from exit node, add an empty memlet if all(succ in dead_nodes for succ in state.successors(node)): state.add_nedge(node, exit_node, Memlet()) ############################################# # Removal ############################################# predecessor_nsdfgs: Dict[nodes.NestedSDFG, Set[str]] = defaultdict(set) for node in dead_nodes: # Remove memlet paths and connectors pertaining to dead nodes for e in state.in_edges(node): mtree = state.memlet_tree(e) for leaf in mtree.leaves(): # Keep track of predecessors of removed nodes for connector pruning if isinstance(leaf.src, nodes.NestedSDFG): predecessor_nsdfgs[leaf.src].add(leaf.src_conn) state.remove_memlet_path(leaf) # Remove the node itself as necessary state.remove_node(node) result[state].update(dead_nodes) # Remove isolated access nodes after elimination access_nodes = set(state.data_nodes()) for node in access_nodes: if state.degree(node) == 0: state.remove_node(node) result[state].add(node) # Prune now-dead connectors for node, dead_conns in predecessor_nsdfgs.items(): for conn in dead_conns: # If removed connector belonged to a nested SDFG, and no other input connector shares name, # make nested data transient (dead dataflow elimination would remove internally as necessary) if conn not in node.in_connectors: node.sdfg.arrays[conn].transient = True # Update read sets for the predecessor states to reuse access_nodes -= result[state] access_node_names = set(n.data for n in access_nodes if state.out_degree(n) > 0) access_sets[state] = (access_node_names, access_sets[state][1]) return result or None
def apply(self, sdfg): # Obtain loop information guard: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_guard]) begin: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_begin]) after_state: sd.SDFGState = sdfg.node( self.subgraph[DetectLoop._exit_state]) # Obtain iteration variable, range, and stride guard_inedges = sdfg.in_edges(guard) condition_edge = sdfg.edges_between(guard, begin)[0] itervar = list(guard_inedges[0].data.assignments.keys())[0] condition = condition_edge.data.condition_sympy() rng = LoopUnroll._loop_range(itervar, guard_inedges, condition) # Loop must be unrollable if self.count == 0 and any( symbolic.issymbolic(r, sdfg.constants) for r in rng): raise ValueError('Loop cannot be fully unrolled, size is symbolic') if self.count != 0: raise NotImplementedError # TODO(later) # Find the state prior to the loop if rng[0] == symbolic.pystr_to_symbolic( guard_inedges[0].data.assignments[itervar]): before_state: sd.SDFGState = guard_inedges[0].src last_state: sd.SDFGState = guard_inedges[1].src else: before_state: sd.SDFGState = guard_inedges[1].src last_state: sd.SDFGState = guard_inedges[0].src # Get loop states loop_states = list( sdutil.dfs_topological_sort( sdfg, sources=[begin], condition=lambda _, child: child != guard)) first_id = loop_states.index(begin) last_id = loop_states.index(last_state) loop_subgraph = gr.SubgraphView(sdfg, loop_states) # Evaluate the real values of the loop start, end, stride = (symbolic.evaluate(r, sdfg.constants) for r in rng) # Create states for loop subgraph unrolled_states = [] for i in range(start, end + 1, stride): # Using to/from JSON copies faster than deepcopy (which will also # copy the parent SDFG) new_states = [ sd.SDFGState.from_json(s.to_json(), context={'sdfg': sdfg}) for s in loop_states ] # Replace iterate with value in each state for state in new_states: state.set_label(state.label + '_%s_%d' % (itervar, i)) state.replace(itervar, i) # Add subgraph to original SDFG for edge in loop_subgraph.edges(): src = new_states[loop_states.index(edge.src)] dst = new_states[loop_states.index(edge.dst)] # Replace conditions in subgraph edges data: sd.InterstateEdge = copy.deepcopy(edge.data) if data.condition: ASTFindReplace({itervar: str(i)}).visit(data.condition) sdfg.add_edge(src, dst, data) # Connect iterations with unconditional edges if len(unrolled_states) > 0: sdfg.add_edge(unrolled_states[-1][1], new_states[first_id], sd.InterstateEdge()) unrolled_states.append((new_states[first_id], new_states[last_id])) # Connect new states to before and after states without conditions if unrolled_states: sdfg.add_edge(before_state, unrolled_states[0][0], sd.InterstateEdge()) sdfg.add_edge(unrolled_states[-1][1], after_state, sd.InterstateEdge()) # Remove old states from SDFG sdfg.remove_nodes_from([guard] + loop_states)