def create_profile(name, iet): """ Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level performance profiling. In particular, turn all :class:`Section`s within ``iet`` into :class:`TimedList`s. A :class:`Profiler` is returned to access profiling data. """ sections = FindNodes(Section).visit(iet) # Construct the Profiler profiler = Profiler(name) for section in sections: # All ExpressionBundles within `section` bundles = FindNodes(ExpressionBundle).visit(section) # Total operation count ops = sum(i.ops for i in bundles) # Operation count at each section iteration sops = sum( estimate_cost(i.expr) for i in flatten(b.exprs for b in bundles)) # Total memory traffic mapper = {} for i in bundles: for k, v in i.traffic.items(): mapper.setdefault(k, []).append(v) traffic = [ IntervalGroup.generate('merge', *i) for i in mapper.values() ] traffic = sum(i.extent for i in traffic) # Each ExpressionBundle lives in its own iteration space itershapes = [i.shape for i in bundles] # Track how many grid points are written within `section` points = [] for i in bundles: writes = { e.write for e in i.exprs if e.is_tensor and e.write.is_TimeFunction } points.append(reduce(mul, i.shape) * len(writes)) points = sum(points) profiler.add(section, SectionData(ops, sops, points, traffic, itershapes)) # Transform the Iteration/Expression tree introducing the C-level timers mapper = { i: TimedList(gname=name, lname=i.name, body=i.body) for i in sections } iet = Transformer(mapper).visit(iet) return iet, profiler
def instrument(self, iet): """ Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level performance profiling. In particular, turn all Sections within ``iet`` into TimedLists. """ sections = FindNodes(Section).visit(iet) for section in sections: bundles = FindNodes(ExpressionBundle).visit(section) # Total operation count ops = sum(i.ops for i in bundles) # Operation count at each section iteration sops = sum( estimate_cost(i.expr) for i in flatten(b.exprs for b in bundles)) # Total memory traffic mapper = {} for i in bundles: for k, v in i.traffic.items(): mapper.setdefault(k, []).append(v) traffic = 0 for i in mapper.values(): try: traffic += IntervalGroup.generate('union', *i).size except ValueError: # Over different iteration spaces traffic += sum(j.size for j in i) # Each ExpressionBundle lives in its own iteration space itermaps = [i.ispace.dimension_map for i in bundles] # Track how many grid points are written within `section` points = [] for i in bundles: writes = { e.write for e in i.exprs if e.is_tensor and e.write.is_TimeFunction } points.append(i.size * len(writes)) points = sum(points) self._sections[section] = SectionData(ops, sops, points, traffic, itermaps) # Transform the Iteration/Expression tree introducing the C-level timers mapper = { i: TimedList(timer=self.timer, lname=i.name, body=i) for i in sections } iet = Transformer(mapper).visit(iet) return iet
def analyze(self, iet): """ Analyze the Sections in the given IET. This populates `self._sections`. """ sections = FindNodes(Section).visit(iet) for s in sections: if s.name in self._sections: continue bundles = FindNodes(ExpressionBundle).visit(s) # Total operation count ops = sum(i.ops*i.ispace.size for i in bundles) # Operation count at each section iteration sops = sum(i.ops for i in bundles) # Total memory traffic mapper = {} for i in bundles: for k, v in i.traffic.items(): mapper.setdefault(k, []).append(v) traffic = 0 for i in mapper.values(): try: traffic += IntervalGroup.generate('union', *i).size except ValueError: # Over different iteration spaces traffic += sum(j.size for j in i) # Each ExpressionBundle lives in its own iteration space itermaps = [i.ispace.dimension_map for i in bundles] # Track how many grid points are written within `s` points = [] for i in bundles: writes = {e.write for e in i.exprs if e.is_tensor and e.write.is_TimeFunction} points.append(i.size*len(writes)) points = sum(points) self._sections[s.name] = SectionData(ops, sops, points, traffic, itermaps)
def instrument(self, iet): """ Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level performance profiling. In particular, turn all :class:`Section`s within ``iet`` into :class:`TimedList`s. """ sections = FindNodes(Section).visit(iet) for section in sections: bundles = FindNodes(ExpressionBundle).visit(section) # Total operation count ops = sum(i.ops for i in bundles) # Operation count at each section iteration sops = sum(estimate_cost(i.expr) for i in flatten(b.exprs for b in bundles)) # Total memory traffic mapper = {} for i in bundles: for k, v in i.traffic.items(): mapper.setdefault(k, []).append(v) traffic = [IntervalGroup.generate('merge', *i) for i in mapper.values()] traffic = sum(i.size for i in traffic) # Each ExpressionBundle lives in its own iteration space itershapes = [i.shape for i in bundles] # Track how many grid points are written within `section` points = [] for i in bundles: writes = {e.write for e in i.exprs if e.is_tensor and e.write.is_TimeFunction} points.append(reduce(mul, i.shape)*len(writes)) points = sum(points) self._sections[section] = SectionData(ops, sops, points, traffic, itershapes) # Transform the Iteration/Expression tree introducing the C-level timers mapper = {i: TimedList(timer=self.timer, lname=i.name, body=i) for i in sections} iet = Transformer(mapper).visit(iet) return iet
def dspace(self): """ Derive the DataSpace of the Cluster from its expressions, IterationSpace, and Guards. """ accesses = detect_accesses(self.exprs) # Construct the `parts` of the DataSpace, that is a projection of the data # space for each Function appearing in `self.exprs` parts = {} for f, v in accesses.items(): if f is None: continue intervals = [ Interval(d, min(offs), max(offs)) for d, offs in v.items() ] intervals = IntervalGroup(intervals) # Factor in the IterationSpace -- if the min/max points aren't zero, # then the data intervals need to shrink/expand accordingly intervals = intervals.promote(lambda d: d.is_Block) shift = self.ispace.intervals.promote(lambda d: d.is_Block) intervals = intervals.add(shift) # Map SubIterators to the corresponding data space Dimension # E.g., `xs -> x -> x0_blk0 -> x` or `t0 -> t -> time` intervals = intervals.promote(lambda d: d.is_SubIterator) # If the bound of a Dimension is explicitly guarded, then we should # shrink the `parts` accordingly for d, v in self.guards.items(): ret = v.find(BaseGuardBoundNext) assert len(ret) <= 1 if len(ret) != 1: continue if ret.pop().direction is Forward: intervals = intervals.translate(d, v1=-1) else: intervals = intervals.translate(d, 1) # Special case: if the factor of a ConditionalDimension has value 1, # then we can safely resort to the parent's Interval intervals = intervals.promote( lambda d: d.is_Conditional and d.factor == 1) parts[f] = intervals # Determine the Dimensions requiring shifted min/max points to avoid # OOB accesses oobs = set() for f, v in parts.items(): for i in v: if i.dim.is_Sub: d = i.dim.parent else: d = i.dim try: if i.lower < 0 or \ i.upper > f._size_nodomain[d].left + f._size_halo[d].right: # It'd mean trying to access a point before the # left halo (test0) or after the right halo (test1) oobs.update(d._defines) except (KeyError, TypeError): # Unable to detect presence of OOB accesses (e.g., `d` not in # `f._size_halo`, that is typical of indirect accesses `A[B[i]]`) pass # Construct the `intervals` of the DataSpace, that is a global, # Dimension-centric view of the data space intervals = IntervalGroup.generate('union', *parts.values()) # E.g., `db0 -> time`, but `xi NOT-> x` intervals = intervals.promote(lambda d: not d.is_Sub) intervals = intervals.zero(set(intervals.dimensions) - oobs) return DataSpace(intervals, parts)