def _drop_halospots(iet): """ Remove HaloSpots that: * Embed SEQUENTIAL Iterations * Would be used to compute Increments (in which case, a halo exchange is actually unnecessary) """ mapper = defaultdict(set) # If a HaloSpot Dimension turns out to be SEQUENTIAL, then the HaloSpot is useless for hs, iterations in MapNodes(HaloSpot, Iteration).visit(iet).items(): if any(i.is_Sequential for i in iterations if i.dim.root in hs.dimensions): mapper[hs].update(set(hs.functions)) # If all HaloSpot reads pertain to increments, then the HaloSpot is useless for hs, expressions in MapNodes(HaloSpot, Expression).visit(iet).items(): for f in hs.fmapper: scope = Scope([i.expr for i in expressions]) if all(i.is_increment for i in scope.reads.get(f, [])): mapper[hs].add(f) # Transform the IET introducing the "reduced" HaloSpots subs = { hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(mapper[hs])) for hs in FindNodes(HaloSpot).visit(iet) } iet = Transformer(subs, nested=True).visit(iet) return iet
def mark_halospot_useless(analysis): """ Update ``analysis`` detecting the ``useless`` HaloSpots within ``analysis.iet``. """ properties = OrderedDict() # If a HaloSpot Dimension turns out to be SEQUENTIAL, then the HaloSpot is useless for hs, iterations in MapNodes(HaloSpot, Iteration).visit(analysis.iet).items(): if any(SEQUENTIAL in analysis.properties[i] for i in iterations if i.dim.root in hs.dimensions): properties[hs] = useless(hs.functions) continue # If a Function is never written to, or if all HaloSpot reads pertain to an increment # expression, then the HaloSpot is useless for tree in analysis.trees: scope = analysis.scopes[tree.root] for hs, v in MapNodes(HaloSpot).visit(tree.root).items(): if hs in properties: continue found = [] for f in hs.fmapper: test0 = not scope.writes.get(f) test1 = (all(i.is_Expression for i in v) and all(r.is_increment for r in Scope([i.expr for i in v]).reads[f])) if test0 or test1: found.append(f) if found: properties[hs] = useless(tuple(found)) analysis.update(properties)
def _hoist_halospots(iet): """ Hoist HaloSpots from inner to outer Iterations where all data dependencies would be honored. """ # Precompute scopes to save time scopes = { i: Scope([e.expr for e in v]) for i, v in MapNodes().visit(iet).items() } # Analysis hsmapper = {} imapper = defaultdict(list) for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items(): for hs in halo_spots: hsmapper[hs] = hs.halo_scheme for f in hs.fmapper: for n, i in enumerate(iters): maybe_hoistable = set().union( *[i.dim._defines for i in iters[n:]]) d_flow = scopes[i].d_flow.project(f) if all(not (dep.cause & maybe_hoistable) or dep.write.is_increment for dep in d_flow): hsmapper[hs] = hsmapper[hs].drop(f) imapper[i].append(hs.halo_scheme.project(f)) break # Post-process analysis mapper = { i: HaloSpot(HaloScheme.union(hss), i._rebuild()) for i, hss in imapper.items() } mapper.update({ i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) for i, hs in hsmapper.items() }) # Transform the IET hoisting/dropping HaloSpots as according to the analysis iet = Transformer(mapper, nested=True).visit(iet) # Clean up: de-nest HaloSpots if necessary mapper = {} for hs in FindNodes(HaloSpot).visit(iet): if hs.body.is_HaloSpot: halo_scheme = HaloScheme.union( [hs.halo_scheme, hs.body.halo_scheme]) mapper[hs] = hs._rebuild(halo_scheme=halo_scheme, body=hs.body.body) iet = Transformer(mapper, nested=True).visit(iet) return iet
def track_subsections(iet, **kwargs): """ Add custom Sections to the `profiler`. Custom Sections include: * MPI Calls (e.g., HaloUpdateCall and HaloUpdateWait) * Busy-waiting on While(lock) (e.g., from host-device orchestration) """ profiler = kwargs['profiler'] sregistry = kwargs['sregistry'] name_mapper = { HaloUpdateCall: 'haloupdate', HaloWaitCall: 'halowait', RemainderCall: 'remainder', HaloUpdateList: 'haloupdate', HaloWaitList: 'halowait', BusyWait: 'busywait' } mapper = {} for NodeType in [MPIList, MPICall, BusyWait]: for k, v in MapNodes(Section, NodeType).visit(iet).items(): for i in v: if i in mapper or not any( issubclass(i.__class__, n) for n in profiler.trackable_subsections): continue name = sregistry.make_name(prefix=name_mapper[i.__class__]) mapper[i] = Section(name, body=i, is_subsection=True) profiler.track_subsection(k.name, name) iet = Transformer(mapper).visit(iet) return iet, {}
def __init__(self, iet): self.iet = iet self.properties = OrderedDict() self.trees = retrieve_iteration_tree(iet, mode='superset') self.scopes = OrderedDict([(k, Scope([i.expr for i in v])) for k, v in MapNodes().visit(iet).items()])
def mark_halospot_useless(analysis): """ Update the ``analysis`` detecting the USELESS HaloSpots within ``analysis.iet``. """ properties = OrderedDict() for hs, iterations in MapNodes(HaloSpot, Iteration).visit(analysis.iet).items(): # `hs` is USELESS if ... # * ANY of its Dimensions turn out to be SEQUENTIAL if any(SEQUENTIAL in analysis.properties[i] for i in iterations if i.dim.root in hs.dimensions): properties[hs] = USELESS continue # * ALL reads pertain to an increment expression test = False scope = analysis.scopes[iterations[0]] for f in hs.fmapper: if any(not r.is_increment for r in scope.reads[f]): test = True break if not test: properties[hs] = USELESS analysis.update(properties)
def mark_halospot_overlappable(analysis): """ Update ``analysis`` detecting the OVERLAPPABLE HaloSpots within ``analysis.iet``. """ properties = OrderedDict() for hs, iterations in MapNodes(HaloSpot, Iteration).visit(analysis.iet).items(): # To be OVERLAPPABLE, all inner Iterations must be PARALLEL if all(PARALLEL in analysis.properties.get(i) for i in iterations): properties[hs] = OVERLAPPABLE analysis.update(properties)
def _merge_halospots(iet): """ Merge HaloSpots on the same Iteration tree level where all data dependencies would be honored. """ # Analysis mapper = {} for i, halo_spots in MapNodes(Iteration, HaloSpot, 'immediate').visit(iet).items(): if i is None or len(halo_spots) <= 1: continue scope = Scope([e.expr for e in FindNodes(Expression).visit(i)]) hs0 = halo_spots[0] mapper[hs0] = hs0.halo_scheme for hs in halo_spots[1:]: mapper[hs] = hs.halo_scheme for f in hs.fmapper: test = True for dep in scope.d_flow.project(f): if not (dep.cause & set(hs.dimensions)): continue if dep.is_regular and all( not any(dep.read.touched_halo(c.root)) for c in dep.cause): continue test = False break if test: mapper[hs0] = HaloScheme.union( [mapper[hs0], hs.halo_scheme.project(f)]) mapper[hs] = mapper[hs].drop(f) # Post-process analysis mapper = { i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) for i, hs in mapper.items() } # Transform the IET merging/dropping HaloSpots as according to the analysis iet = Transformer(mapper, nested=True).visit(iet) return iet
def mark_halospot_hoistable(analysis): """ Update ``analysis`` detecting the ``hoistable`` HaloSpots within ``analysis.iet``. """ properties = OrderedDict() for i, halo_spots in MapNodes(Iteration, HaloSpot).visit(analysis.iet).items(): for hs in halo_spots: if hs in properties: # Already went through this HaloSpot continue found = [] scope = analysis.scopes[i] for f, hse in hs.fmapper.items(): # The sufficient condition for `f`'s halo-update to be # `hoistable` is that there are no `hs.dimensions`-induced # flow-dependences touching the halo test = True for dep in scope.d_flow.project(f): test = not (dep.cause & set(hs.dimensions)) if test: continue test = dep.write.is_increment if test: continue test = all(not any(dep.read.touched_halo(c.root)) for c in dep.cause) if test: continue # `dep` is indeed a flow-dependence touching the halo of distributed # Dimension, so we must assume it's non-hoistable break if test: found.append(f) if found: properties[hs] = hoistable(tuple(found)) analysis.update(properties)
def mark_halospot_hoistable(analysis): """ Update the ``analysis`` detecting the HOISTABLE HaloSpots within ``analysis.iet``. """ properties = OrderedDict() for i, halo_spots in MapNodes(Iteration, HaloSpot).visit(analysis.iet).items(): for hs in halo_spots: if hs in properties: # Already went through this HaloSpot, let's save some analysis time continue # A sufficient condition to be `hoistable` is that, for a given Function, # there are no anti-dependences in the entire scope. # TODO: This condition can actually be relaxed, by considering smaller # sections of the scope found = [ f for f in hs.fmapper if not analysis.scopes[i].d_anti.project(f) ] if found: properties[hs] = hoistable(tuple(found)) analysis.update(properties)
def _optimize_halospots(self, iet): """ Optimize the HaloSpots in ``iet``. * Remove all ``useless`` HaloSpots; * Merge all ``hoistable`` HaloSpots with their root HaloSpot, thus removing redundant communications and anticipating communications that will be required by later Iterations. """ # Drop `useless` HaloSpots mapper = { hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(hs.useless)) for hs in FindNodes(HaloSpot).visit(iet) } iet = Transformer(mapper, nested=True).visit(iet) # Handle `hoistable` HaloSpots # First, we merge `hoistable` HaloSpots together, to anticipate communications mapper = {} for tree in retrieve_iteration_tree(iet): halo_spots = FindNodes(HaloSpot).visit(tree.root) if not halo_spots: continue root = halo_spots[0] if root in mapper: continue hss = [root.halo_scheme] hss.extend([ hs.halo_scheme.project(hs.hoistable) for hs in halo_spots[1:] ]) try: mapper[root] = root._rebuild(halo_scheme=HaloScheme.union(hss)) except ValueError: # HaloSpots have non-matching `loc_indices` and therefore can't be merged warning("Found hoistable HaloSpots with disjoint loc_indices, " "skipping optimization") continue for hs in halo_spots[1:]: halo_scheme = hs.halo_scheme.drop(hs.hoistable) if halo_scheme.is_void: mapper[hs] = hs.body else: mapper[hs] = hs._rebuild(halo_scheme=halo_scheme) iet = Transformer(mapper, nested=True).visit(iet) # Then, we make sure the halo exchanges get performed *before* # the first distributed Dimension. Again, we do this to anticipate # communications, which hopefully has a pay off in performance # # <Iteration x> <HaloSpot(u)>, in y # <HaloSpot(u)>, in y ----> <Iteration x> # <Iteration y> <Iteration y> mapper = {} for i, halo_spots in MapNodes(Iteration, HaloSpot).visit(iet).items(): hoistable = [hs for hs in halo_spots if hs.hoistable] if not hoistable: continue elif len(hoistable) > 1: # We should never end up here, but for now we can't prove it formally warning( "Found multiple hoistable HaloSpots, skipping optimization" ) continue hs = hoistable.pop() if hs in mapper: continue if i.dim.root in hs.dimensions: halo_scheme = hs.halo_scheme.drop(hs.hoistable) if halo_scheme.is_void: mapper[hs] = hs.body else: mapper[hs] = hs._rebuild(halo_scheme=halo_scheme) halo_scheme = hs.halo_scheme.project(hs.hoistable) mapper[i] = hs._rebuild(halo_scheme=halo_scheme, body=i._rebuild()) iet = Transformer(mapper, nested=True).visit(iet) # Finally, we try to move HaloSpot-free Iteration nests within HaloSpot # subtrees, to overlap as much computation as possible. The HaloSpot-free # Iteration nests must be fully affine, otherwise we wouldn't be able to # honour the data dependences along the halo # # <HaloSpot(u,v)> HaloSpot(u,v) # <A> ----> <A> # <B> affine? <B> # # Here, <B> doesn't require any halo exchange, but it might still need the # output of <A>; thus, if we do computation/communication overlap over <A> # *and* want to embed <B> within the HaloSpot, then <B>'s iteration space # will have to be split as well. For this, <B> must be affine. mapper = {} for v in FindAdjacent((HaloSpot, Iteration)).visit(iet).values(): for g in v: root = None for i in g: if i.is_HaloSpot: root = i mapper[root] = [root.body] elif root and all(j.is_Affine for j in FindNodes(Iteration).visit(i)): mapper[root].append(i) mapper[i] = None else: root = None mapper = { k: k._rebuild(body=List(body=v)) if v else v for k, v in mapper.items() } iet = Transformer(mapper).visit(iet) return iet, {}
def _optimize_halospots(self, iet): """ Optimize the HaloSpots in ``iet``. * Remove all USELESS HaloSpots; * Merge all hoistable HaloSpots with their root HaloSpot, thus removing redundant communications and anticipating communications that will be required by later Iterations. """ # Drop USELESS HaloSpots mapper = {hs: hs.body for hs in FindNodes(HaloSpot).visit(iet) if hs.is_Useless} iet = Transformer(mapper, nested=True).visit(iet) # Handle `hoistable` HaloSpots mapper = {} for halo_spots in MapNodes(Iteration, HaloSpot).visit(iet).values(): root = halo_spots[0] halo_schemes = [hs.halo_scheme.project(hs.hoistable) for hs in halo_spots[1:]] mapper[root] = root._rebuild(halo_scheme=root.halo_scheme.union(halo_schemes)) mapper.update({hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(hs.hoistable)) for hs in halo_spots[1:]}) iet = Transformer(mapper, nested=True).visit(iet) # At this point, some HaloSpots may have become empty (i.e., requiring # no communications), hence they can be removed # # <HaloSpot(u,v)> HaloSpot(u,v) # <A> <A> # <HaloSpot()> ----> <B> # <B> mapper = {i: i.body for i in FindNodes(HaloSpot).visit(iet) if i.is_empty} iet = Transformer(mapper, nested=True).visit(iet) # Finally, we try to move HaloSpot-free Iteration nests within HaloSpot # subtrees, to overlap as much computation as possible. The HaloSpot-free # Iteration nests must be fully affine, otherwise we wouldn't be able to # honour the data dependences along the halo # # <HaloSpot(u,v)> HaloSpot(u,v) # <A> ----> <A> # <B> affine? <B> # # Here, <B> doesn't require any halo exchange, but it might still need the # output of <A>; thus, if we do computation/communication overlap over <A> # *and* want to embed <B> within the HaloSpot, then <B>'s iteration space # will have to be split as well. For this, <B> must be affine. mapper = {} for v in FindAdjacent((HaloSpot, Iteration)).visit(iet).values(): for g in v: root = None for i in g: if i.is_HaloSpot: root = i mapper[root] = [root.body] elif root and all(j.is_Affine for j in FindNodes(Iteration).visit(i)): mapper[root].append(i) mapper[i] = None else: root = None mapper = {k: k._rebuild(body=List(body=v)) if v else v for k, v in mapper.items()} iet = Transformer(mapper).visit(iet) return iet, {}
def _hoist_halospots(iet): """ Hoist HaloSpots from inner to outer Iterations where all data dependencies would be honored. """ # Hoisting rules -- if the retval is True, then it means the input `dep` is not # a stopper to halo hoisting def rule0(dep, candidates, loc_dims): # E.g., `dep=W<f,[x]> -> R<f,[x-1]>` and `candidates=({time}, {x})` => False # E.g., `dep=W<f,[t1, x, y]> -> R<f,[t0, x-1, y+1]>`, `dep.cause={t,time}` and # `candidates=({x},)` => True return (all(i & set(dep.distance_mapper) for i in candidates) and not any(i & dep.cause for i in candidates) and not any(i & loc_dims for i in candidates)) def rule1(dep, candidates, loc_dims): # An increment isn't a stopper to hoisting return dep.write.is_increment hoist_rules = [rule0, rule1] # Precompute scopes to save time scopes = { i: Scope([e.expr for e in v]) for i, v in MapNodes().visit(iet).items() } # Analysis hsmapper = {} imapper = defaultdict(list) for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items(): for hs in halo_spots: hsmapper[hs] = hs.halo_scheme for f, (loc_indices, _) in hs.fmapper.items(): loc_dims = frozenset().union( [q for d in loc_indices for q in d._defines]) for n, i in enumerate(iters): candidates = [i.dim._defines for i in iters[n:]] test = True for dep in scopes[i].d_flow.project(f): if any( rule(dep, candidates, loc_dims) for rule in hoist_rules): continue test = False break if test: hsmapper[hs] = hsmapper[hs].drop(f) imapper[i].append(hs.halo_scheme.project(f)) break # Post-process analysis mapper = { i: HaloSpot(HaloScheme.union(hss), i._rebuild()) for i, hss in imapper.items() } mapper.update({ i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) for i, hs in hsmapper.items() }) # Transform the IET hoisting/dropping HaloSpots as according to the analysis iet = Transformer(mapper, nested=True).visit(iet) # Clean up: de-nest HaloSpots if necessary mapper = {} for hs in FindNodes(HaloSpot).visit(iet): if hs.body.is_HaloSpot: halo_scheme = HaloScheme.union( [hs.halo_scheme, hs.body.halo_scheme]) mapper[hs] = hs._rebuild(halo_scheme=halo_scheme, body=hs.body.body) iet = Transformer(mapper, nested=True).visit(iet) return iet
def _merge_halospots(iet): """ Merge HaloSpots on the same Iteration tree level where all data dependencies would be honored. """ # Merge rules -- if the retval is True, then it means the input `dep` is not # a stopper to halo merging def rule0(dep, hs, loc_indices): # E.g., `dep=W<f,[t1, x]> -> R<f,[t0, x-1]>` => True return not any( d in hs.dimensions or dep.distance_mapper[d] is S.Infinity for d in dep.cause) def rule1(dep, hs, loc_indices): # TODO This is apparently never hit, but feeling uncomfortable to remove it return dep.is_regular and all(not any(dep.read.touched_halo(d.root)) for d in dep.cause) def rule2(dep, hs, loc_indices): # E.g., `dep=W<f,[t1, x+1]> -> R<f,[t1, xl+1]>` and `loc_indices={t: t0}` => True return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v for d, v in loc_indices.items()) merge_rules = [rule0, rule1, rule2] # Analysis mapper = {} for i, halo_spots in MapNodes(Iteration, HaloSpot, 'immediate').visit(iet).items(): if i is None or len(halo_spots) <= 1: continue scope = Scope([e.expr for e in FindNodes(Expression).visit(i)]) hs0 = halo_spots[0] mapper[hs0] = hs0.halo_scheme for hs in halo_spots[1:]: mapper[hs] = hs.halo_scheme for f, (loc_indices, _) in hs.fmapper.items(): test = True for dep in scope.d_flow.project(f): if any(rule(dep, hs, loc_indices) for rule in merge_rules): continue test = False break if test: try: mapper[hs0] = HaloScheme.union( [mapper[hs0], hs.halo_scheme.project(f)]) mapper[hs] = mapper[hs].drop(f) except ValueError: # `hs.loc_indices=<frozendict {t: t1}` and # `hs0.loc_indices=<frozendict {t: t0}` pass # Post-process analysis mapper = { i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) for i, hs in mapper.items() } # Transform the IET merging/dropping HaloSpots as according to the analysis iet = Transformer(mapper, nested=True).visit(iet) return iet