def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg: StateGraphView): scope_dict = sdfg.node(state_id).scope_dict() out_costs = 0 for edge in dfg.out_edges(node): _, uconn, v, _, memlet = edge dst_node = dfg.memlet_path(edge)[-1].dst if (isinstance(node, nodes.CodeNode) and isinstance(dst_node, nodes.AccessNode)): # If the memlet is pointing into an array in an inner scope, # it will be handled by the inner scope. if (scope_dict[node] != scope_dict[dst_node] and scope_contains_scope(scope_dict, node, dst_node)): continue if not uconn: # This would normally raise a syntax error return 0 if memlet.subset.data_dims() == 0: if memlet.wcr is not None: # write_and_resolve # We have to assume that every reduction costs 3 # accesses of the same size (read old, read new, write) out_costs += 3 * PAPIUtils.get_memlet_byte_size( sdfg, memlet) else: # This standard operation is already counted out_costs += PAPIUtils.get_memlet_byte_size( sdfg, memlet) return out_costs
def has_surrounding_perfcounters(node, dfg: StateGraphView): """ Returns true if there is a possibility that this node is part of a section that is profiled. """ parent = dfg.entry_node(node) if isinstance(parent, MapEntry): if (parent.map.schedule not in PAPIInstrumentation.perf_whitelist_schedules): return False return True return False
def accumulate_byte_movement(outermost_node, node, dfg: StateGraphView, sdfg, state_id): itvars = dict() # initialize an empty dict # First, get a list of children if isinstance(node, MapEntry): children = dfg.scope_children()[node] else: children = [] assert not (node in children) # If there still are children, descend recursively (dfs is fine here) if len(children) > 0: size = 0 for x in children: size = size + PAPIUtils.accumulate_byte_movement( outermost_node, x, dfg, sdfg, state_id) return size else: if isinstance(node, MapExit): return 0 # We can ignore this. # If we reached the deepest node, get all parents parent_list = PAPIUtils.get_parents(outermost_node, node, sdfg, state_id) if isinstance(node, MapEntry): map_list = parent_list + [node] else: map_list = parent_list # From all iterations, get the iteration count, replacing inner # iteration variables with the next outer variables. for x in map_list: itvars = PAPIUtils.get_iteration_count(x, itvars) itcount = 1 for x in itvars.values(): itcount = itcount * x if isinstance(node, MapEntry): raise ValueError( "Unexpected node" ) # A map entry should never be the innermost node elif isinstance(node, MapExit): return 0 # We can ignore this. elif isinstance(node, Tasklet): return itcount * symbolic.pystr_to_symbolic( PAPIUtils.get_tasklet_byte_accesses( node, dfg, sdfg, state_id)) else: raise NotImplementedError
def get_tasklet_byte_accesses(tasklet: nodes.CodeNode, dfg: StateGraphView, sdfg: dace.SDFG, state_id: int) -> str: """ Get the amount of bytes processed by `tasklet`. The formula is sum(inedges * size) + sum(outedges * size) """ in_accum = [] out_accum = [] in_edges = dfg.in_edges(tasklet) for ie in in_edges: in_accum.append(PAPIUtils.get_memlet_byte_size(sdfg, ie.data)) out_accum.append(PAPIUtils.get_out_memlet_costs(sdfg, state_id, tasklet, dfg)) # Merge full = in_accum full.extend(out_accum) return "(" + sym2cpp(sum(full)) + ")"