def test_find_contiguous_subsets(): subset_list = [ Range([(i, i, 1), (j, j, 1)]), Range([(i, i, 1), (j + 3, j + 3, 1)]), Range([(i, i, 1), (j + 1, j + 2, 1)]), Range([(i - 2, i - 1, 1), (j, j + 3, 1)]), ] result = helpers.find_contiguous_subsets(subset_list) assert len(result) == 1 assert list(result)[0] == Range([(i - 2, i, 1), (j, j + 3, 1)])
def test_find_contiguous_subsets_nonsquare(): subset_list = [ Range([(i, i, 1), (j, j, 1)]), Range([(i, i, 1), (j + 3, j + 3, 1)]), Range([(i, i, 1), (j + 1, j + 2, 1)]), Range([(i + 2, i + 2, 1), (j, j, 1)]), Range([(i + 2, i + 2, 1), (j + 3, j + 3, 1)]), Range([(i + 2, i + 2, 1), (j + 1, j + 2, 1)]), Range([(i + 1, i + 1, 1), (j - 1, j - 1, 1)]), Range([(i + 1, i + 1, 1), (j, j, 1)]), Range([(i + 1, i + 1, 1), (j + 1, j + 1, 1)]), ] # Prioritize on first dimension result2 = helpers.find_contiguous_subsets(subset_list, 0) result2 = helpers.find_contiguous_subsets(result2, None) assert len(result2) == 2 # Prioritize on second dimension result3 = helpers.find_contiguous_subsets(subset_list, 1) assert len(result3) == 3 result3 = helpers.find_contiguous_subsets(result3, None) assert len(result3) == 3
def can_be_applied(sdfg: SDFG, subgraph: SubgraphView) -> bool: ''' Fusible if 1. Maps have the same access sets and ranges in order 2. Any nodes in between two maps are AccessNodes only, without WCR There is at most one AccessNode only on a path between two maps, no other nodes are allowed 3. The exiting memlets' subsets to an intermediate edge must cover the respective incoming memlets' subset into the next map. Also, as a limitation, the union of all exiting memlets' subsets must be contiguous. ''' # get graph graph = subgraph.graph for node in subgraph.nodes(): if node not in graph.nodes(): return False # next, get all the maps map_entries = helpers.get_outermost_scope_maps(sdfg, graph, subgraph) map_exits = [graph.exit_node(map_entry) for map_entry in map_entries] maps = [map_entry.map for map_entry in map_entries] # 1. basic checks: # 1.1 we need to have at least two maps if len(maps) <= 1: return False ''' # 1.2 Special Case: If we can establish a valid permutation, we can # skip check 1.3 permutation = self.find_permutation ''' # 1.3 check whether all maps are the same base_map = maps[0] for map in maps: if map.get_param_num() != base_map.get_param_num(): return False if not all( [p1 == p2 for (p1, p2) in zip(map.params, base_map.params)]): return False if not map.range == base_map.range: return False # 1.3 check whether all map entries have the same schedule schedule = map_entries[0].schedule if not all([entry.schedule == schedule for entry in map_entries]): return False # 2. check intermediate feasiblility # see map_fusion.py for similar checks # with the restrictions below being more relaxed # 2.1 do some preparation work first: # calculate all out_nodes and intermediate_nodes # definition see in apply() node_config = SubgraphFusion.get_adjacent_nodes(sdfg, graph, map_entries) _, intermediate_nodes, out_nodes = node_config # 2.2 topological feasibility: if not SubgraphFusion.check_topo_feasibility( sdfg, graph, map_entries, intermediate_nodes, out_nodes): return False # 2.3 memlet feasibility # For each intermediate node, look at whether inner adjacent # memlets of the exiting map cover inner adjacent memlets # of the next entering map. # We also check for any WCRs on the fly. for node in intermediate_nodes: upper_subsets = set() lower_subsets = set() # First, determine which dimensions of the memlet ranges # change with the map, we do not need to care about the other dimensions. try: dims_to_discard = SubgraphFusion.get_invariant_dimensions( sdfg, graph, map_entries, map_exits, node) except NotImplementedError: return False # find upper_subsets for in_edge in graph.in_edges(node): in_in_edge = graph.memlet_path(in_edge)[-2] # first check for WCRs if in_edge.data.wcr: # check whether the WCR is actually produced at # this edge or further up in the memlet path. If not, # we can still fuse! subset_params = set( [str(s) for s in in_in_edge.data.subset.free_symbols]) if any([ p not in subset_params for p in in_edge.src.map.params ]): return False if in_edge.src in map_exits: subset_to_add = dcpy(in_in_edge.data.subset\ if in_in_edge.data.data == node.data\ else in_in_edge.data.other_subset) subset_to_add.pop(dims_to_discard) upper_subsets.add(subset_to_add) else: raise NotImplementedError("Nodes between two maps to be" "fused with *incoming* edges" "from outside the maps are not" "allowed yet.") # find lower_subsets for out_edge in graph.out_edges(node): if out_edge.dst in map_entries: # cannot use memlet tree here as there could be # not just one map succedding. Do it manually for oedge in graph.out_edges(out_edge.dst): if oedge.src_conn[3:] == out_edge.dst_conn[2:]: subset_to_add = dcpy(oedge.data.subset \ if oedge.data.data == node.data \ else oedge.data.other_subset) subset_to_add.pop(dims_to_discard) lower_subsets.add(subset_to_add) # We assume that upper_subsets are contiguous # Check for this. try: contiguous_upper = find_contiguous_subsets(upper_subsets) if len(contiguous_upper) > 1: return False except TypeError: warnings.warn( 'Could not determine whether subset is continuous.' 'Exiting Check with False.') return False # now take union of upper subsets upper_iter = iter(upper_subsets) union_upper = next(upper_iter) for subs in upper_iter: union_upper = subsets.union(union_upper, subs) if not union_upper: # something went wrong using union -- we'd rather abort return False # finally check coverage # every lower subset must be completely covered by union_upper for lower_subset in lower_subsets: if not union_upper.covers(lower_subset): return False return True
def apply(self, sdfg: sd.SDFG): graph: sd.SDFGState = sdfg.nodes()[self.state_id] map_entry = graph.node(self.subgraph[DeduplicateAccess._map_entry]) node1 = graph.node(self.subgraph[DeduplicateAccess._node1]) node2 = graph.node(self.subgraph[DeduplicateAccess._node2]) # Steps: # 1. Find unique subsets # 2. Find sets of contiguous subsets # 3. Create transients for subsets # 4. Redirect edges through new transients edges1 = set(e.src_conn for e in graph.edges_between(map_entry, node1)) edges2 = set(e.src_conn for e in graph.edges_between(map_entry, node2)) # Only apply to first connector (determinism) conn = sorted(edges1 & edges2)[0] edges = [e for e in graph.out_edges(map_entry) if e.src_conn == conn] # Get original data descriptor dname = edges[0].data.data desc = sdfg.arrays[edges[0].data.data] if isinstance(edges[0].dst, nodes.AccessNode) and '15' in edges[0].dst.data: sdfg.save('faulty_dedup.sdfg') # Get unique subsets unique_subsets = set(e.data.subset for e in edges) # Find largest contiguous subsets try: # Start from stride-1 dimension contiguous_subsets = helpers.find_contiguous_subsets( unique_subsets, dim=next(i for i, s in enumerate(desc.strides) if s == 1)) except (StopIteration, NotImplementedError): warnings.warn( "DeduplicateAcces::Not operating on Stride One Dimension!") contiguous_subsets = unique_subsets # Then find subsets for rest of the dimensions contiguous_subsets = helpers.find_contiguous_subsets( contiguous_subsets) # Map original edges to subsets edge_mapping = defaultdict(list) for e in edges: for ind, subset in enumerate(contiguous_subsets): if subset.covers(e.data.subset): edge_mapping[ind].append(e) break else: raise ValueError( "Failed to find contiguous subset for edge %s" % e.data) # Create transients for subsets and redirect edges for ind, subset in enumerate(contiguous_subsets): name, _ = sdfg.add_temp_transient(subset.size(), desc.dtype) anode = graph.add_access(name) graph.add_edge(map_entry, conn, anode, None, Memlet(data=dname, subset=subset)) for e in edge_mapping[ind]: graph.remove_edge(e) new_memlet = copy.deepcopy(e.data) new_edge = graph.add_edge(anode, None, e.dst, e.dst_conn, new_memlet) for pe in graph.memlet_tree(new_edge): # Rename data on memlet pe.data.data = name # Offset memlets to match new transient pe.data.subset.offset(subset, True)