Exemplo n.º 1
def _drop_halospots(iet):
    Remove HaloSpots that:

        * Embed SEQUENTIAL Iterations
        * Would be used to compute Increments (in which case, a halo exchange
          is actually unnecessary)
    mapper = defaultdict(set)

    # If a HaloSpot Dimension turns out to be SEQUENTIAL, then the HaloSpot is useless
    for hs, iterations in MapNodes(HaloSpot, Iteration).visit(iet).items():
        if any(i.is_Sequential for i in iterations
               if i.dim.root in hs.dimensions):

    # If all HaloSpot reads pertain to increments, then the HaloSpot is useless
    for hs, expressions in MapNodes(HaloSpot, Expression).visit(iet).items():
        for f in hs.fmapper:
            scope = Scope([i.expr for i in expressions])
            if all(i.is_increment for i in scope.reads.get(f, [])):

    # Transform the IET introducing the "reduced" HaloSpots
    subs = {
        hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(mapper[hs]))
        for hs in FindNodes(HaloSpot).visit(iet)
    iet = Transformer(subs, nested=True).visit(iet)

    return iet
Exemplo n.º 2
def mark_halospot_useless(analysis):
    Update ``analysis`` detecting the ``useless`` HaloSpots within ``analysis.iet``.
    properties = OrderedDict()

    # If a HaloSpot Dimension turns out to be SEQUENTIAL, then the HaloSpot is useless
    for hs, iterations in MapNodes(HaloSpot, Iteration).visit(analysis.iet).items():
        if any(SEQUENTIAL in analysis.properties[i]
               for i in iterations if i.dim.root in hs.dimensions):
            properties[hs] = useless(hs.functions)

    # If a Function is never written to, or if all HaloSpot reads pertain to an increment
    # expression, then the HaloSpot is useless
    for tree in analysis.trees:
        scope = analysis.scopes[tree.root]

        for hs, v in MapNodes(HaloSpot).visit(tree.root).items():
            if hs in properties:

            found = []
            for f in hs.fmapper:
                test0 = not scope.writes.get(f)
                test1 = (all(i.is_Expression for i in v) and
                         all(r.is_increment for r in Scope([i.expr for i in v]).reads[f]))
                if test0 or test1:

            if found:
                properties[hs] = useless(tuple(found))

Exemplo n.º 3
def _hoist_halospots(iet):
    Hoist HaloSpots from inner to outer Iterations where all data dependencies
    would be honored.
    # Precompute scopes to save time
    scopes = {
        i: Scope([e.expr for e in v])
        for i, v in MapNodes().visit(iet).items()

    # Analysis
    hsmapper = {}
    imapper = defaultdict(list)
    for iters, halo_spots in MapNodes(Iteration, HaloSpot,
        for hs in halo_spots:
            hsmapper[hs] = hs.halo_scheme

            for f in hs.fmapper:
                for n, i in enumerate(iters):
                    maybe_hoistable = set().union(
                        *[i.dim._defines for i in iters[n:]])
                    d_flow = scopes[i].d_flow.project(f)

                    if all(not (dep.cause
                                & maybe_hoistable) or dep.write.is_increment
                           for dep in d_flow):
                        hsmapper[hs] = hsmapper[hs].drop(f)

    # Post-process analysis
    mapper = {
        i: HaloSpot(HaloScheme.union(hss), i._rebuild())
        for i, hss in imapper.items()
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in hsmapper.items()

    # Transform the IET hoisting/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    # Clean up: de-nest HaloSpots if necessary
    mapper = {}
    for hs in FindNodes(HaloSpot).visit(iet):
        if hs.body.is_HaloSpot:
            halo_scheme = HaloScheme.union(
                [hs.halo_scheme, hs.body.halo_scheme])
            mapper[hs] = hs._rebuild(halo_scheme=halo_scheme,
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Exemplo n.º 4
def track_subsections(iet, **kwargs):
    Add custom Sections to the `profiler`. Custom Sections include:

        * MPI Calls (e.g., HaloUpdateCall and HaloUpdateWait)
        * Busy-waiting on While(lock) (e.g., from host-device orchestration)
    profiler = kwargs['profiler']
    sregistry = kwargs['sregistry']

    name_mapper = {
        HaloUpdateCall: 'haloupdate',
        HaloWaitCall: 'halowait',
        RemainderCall: 'remainder',
        HaloUpdateList: 'haloupdate',
        HaloWaitList: 'halowait',
        BusyWait: 'busywait'

    mapper = {}

    for NodeType in [MPIList, MPICall, BusyWait]:
        for k, v in MapNodes(Section, NodeType).visit(iet).items():
            for i in v:
                if i in mapper or not any(
                        issubclass(i.__class__, n)
                        for n in profiler.trackable_subsections):
                name = sregistry.make_name(prefix=name_mapper[i.__class__])
                mapper[i] = Section(name, body=i, is_subsection=True)
                profiler.track_subsection(k.name, name)

    iet = Transformer(mapper).visit(iet)

    return iet, {}
Exemplo n.º 5
    def __init__(self, iet):
        self.iet = iet
        self.properties = OrderedDict()

        self.trees = retrieve_iteration_tree(iet, mode='superset')
        self.scopes = OrderedDict([(k, Scope([i.expr for i in v]))
                                   for k, v in MapNodes().visit(iet).items()])
Exemplo n.º 6
def mark_halospot_useless(analysis):
    Update the ``analysis`` detecting the USELESS HaloSpots within ``analysis.iet``.
    properties = OrderedDict()
    for hs, iterations in MapNodes(HaloSpot,
        # `hs` is USELESS if ...

        # * ANY of its Dimensions turn out to be SEQUENTIAL
        if any(SEQUENTIAL in analysis.properties[i] for i in iterations
               if i.dim.root in hs.dimensions):
            properties[hs] = USELESS

        # * ALL reads pertain to an increment expression
        test = False
        scope = analysis.scopes[iterations[0]]
        for f in hs.fmapper:
            if any(not r.is_increment for r in scope.reads[f]):
                test = True
        if not test:
            properties[hs] = USELESS

Exemplo n.º 7
def mark_halospot_overlappable(analysis):
    Update ``analysis`` detecting the OVERLAPPABLE HaloSpots within ``analysis.iet``.
    properties = OrderedDict()
    for hs, iterations in MapNodes(HaloSpot, Iteration).visit(analysis.iet).items():
        # To be OVERLAPPABLE, all inner Iterations must be PARALLEL
        if all(PARALLEL in analysis.properties.get(i) for i in iterations):
            properties[hs] = OVERLAPPABLE

Exemplo n.º 8
def _merge_halospots(iet):
    Merge HaloSpots on the same Iteration tree level where all data dependencies
    would be honored.
    # Analysis
    mapper = {}
    for i, halo_spots in MapNodes(Iteration, HaloSpot,
        if i is None or len(halo_spots) <= 1:

        scope = Scope([e.expr for e in FindNodes(Expression).visit(i)])

        hs0 = halo_spots[0]
        mapper[hs0] = hs0.halo_scheme

        for hs in halo_spots[1:]:
            mapper[hs] = hs.halo_scheme

            for f in hs.fmapper:
                test = True
                for dep in scope.d_flow.project(f):
                    if not (dep.cause & set(hs.dimensions)):
                    if dep.is_regular and all(
                            not any(dep.read.touched_halo(c.root))
                            for c in dep.cause):
                    test = False
                if test:
                    mapper[hs0] = HaloScheme.union(
                        [mapper[hs0], hs.halo_scheme.project(f)])
                    mapper[hs] = mapper[hs].drop(f)

    # Post-process analysis
    mapper = {
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in mapper.items()

    # Transform the IET merging/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Exemplo n.º 9
def mark_halospot_hoistable(analysis):
    Update ``analysis`` detecting the ``hoistable`` HaloSpots within ``analysis.iet``.
    properties = OrderedDict()
    for i, halo_spots in MapNodes(Iteration,
        for hs in halo_spots:
            if hs in properties:
                # Already went through this HaloSpot

            found = []
            scope = analysis.scopes[i]
            for f, hse in hs.fmapper.items():
                # The sufficient condition for `f`'s halo-update to be
                # `hoistable` is that there are no `hs.dimensions`-induced
                # flow-dependences touching the halo
                test = True
                for dep in scope.d_flow.project(f):
                    test = not (dep.cause & set(hs.dimensions))
                    if test:

                    test = dep.write.is_increment
                    if test:

                    test = all(not any(dep.read.touched_halo(c.root))
                               for c in dep.cause)
                    if test:

                    # `dep` is indeed a flow-dependence touching the halo of distributed
                    # Dimension, so we must assume it's non-hoistable

                if test:

            if found:
                properties[hs] = hoistable(tuple(found))

Exemplo n.º 10
def mark_halospot_hoistable(analysis):
    Update the ``analysis`` detecting the HOISTABLE HaloSpots within ``analysis.iet``.
    properties = OrderedDict()
    for i, halo_spots in MapNodes(Iteration,
        for hs in halo_spots:
            if hs in properties:
                # Already went through this HaloSpot, let's save some analysis time
            # A sufficient condition to be `hoistable` is that, for a given Function,
            # there are no anti-dependences in the entire scope.
            # TODO: This condition can actually be relaxed, by considering smaller
            # sections of the scope
            found = [
                f for f in hs.fmapper
                if not analysis.scopes[i].d_anti.project(f)
            if found:
                properties[hs] = hoistable(tuple(found))

Exemplo n.º 11
    def _optimize_halospots(self, iet):
        Optimize the HaloSpots in ``iet``.

        * Remove all ``useless`` HaloSpots;
        * Merge all ``hoistable`` HaloSpots with their root HaloSpot, thus
          removing redundant communications and anticipating communications
          that will be required by later Iterations.
        # Drop `useless` HaloSpots
        mapper = {
            hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(hs.useless))
            for hs in FindNodes(HaloSpot).visit(iet)
        iet = Transformer(mapper, nested=True).visit(iet)

        # Handle `hoistable` HaloSpots
        # First, we merge `hoistable` HaloSpots together, to anticipate communications
        mapper = {}
        for tree in retrieve_iteration_tree(iet):
            halo_spots = FindNodes(HaloSpot).visit(tree.root)
            if not halo_spots:
            root = halo_spots[0]
            if root in mapper:
            hss = [root.halo_scheme]
                hs.halo_scheme.project(hs.hoistable) for hs in halo_spots[1:]
                mapper[root] = root._rebuild(halo_scheme=HaloScheme.union(hss))
            except ValueError:
                # HaloSpots have non-matching `loc_indices` and therefore can't be merged
                warning("Found hoistable HaloSpots with disjoint loc_indices, "
                        "skipping optimization")
            for hs in halo_spots[1:]:
                halo_scheme = hs.halo_scheme.drop(hs.hoistable)
                if halo_scheme.is_void:
                    mapper[hs] = hs.body
                    mapper[hs] = hs._rebuild(halo_scheme=halo_scheme)
        iet = Transformer(mapper, nested=True).visit(iet)

        # Then, we make sure the halo exchanges get performed *before*
        # the first distributed Dimension. Again, we do this to anticipate
        # communications, which hopefully has a pay off in performance
        # <Iteration x>                    <HaloSpot(u)>, in y
        #   <HaloSpot(u)>, in y    ---->   <Iteration x>
        #   <Iteration y>                    <Iteration y>
        mapper = {}
        for i, halo_spots in MapNodes(Iteration, HaloSpot).visit(iet).items():
            hoistable = [hs for hs in halo_spots if hs.hoistable]
            if not hoistable:
            elif len(hoistable) > 1:
                # We should never end up here, but for now we can't prove it formally
                    "Found multiple hoistable HaloSpots, skipping optimization"
            hs = hoistable.pop()
            if hs in mapper:
            if i.dim.root in hs.dimensions:
                halo_scheme = hs.halo_scheme.drop(hs.hoistable)
                if halo_scheme.is_void:
                    mapper[hs] = hs.body
                    mapper[hs] = hs._rebuild(halo_scheme=halo_scheme)

                halo_scheme = hs.halo_scheme.project(hs.hoistable)
                mapper[i] = hs._rebuild(halo_scheme=halo_scheme,
        iet = Transformer(mapper, nested=True).visit(iet)

        # Finally, we try to move HaloSpot-free Iteration nests within HaloSpot
        # subtrees, to overlap as much computation as possible. The HaloSpot-free
        # Iteration nests must be fully affine, otherwise we wouldn't be able to
        # honour the data dependences along the halo
        # <HaloSpot(u,v)>            HaloSpot(u,v)
        #   <A>             ---->      <A>
        # <B>              affine?     <B>
        # Here, <B> doesn't require any halo exchange, but it might still need the
        # output of <A>; thus, if we do computation/communication overlap over <A>
        # *and* want to embed <B> within the HaloSpot, then <B>'s iteration space
        # will have to be split as well. For this, <B> must be affine.
        mapper = {}
        for v in FindAdjacent((HaloSpot, Iteration)).visit(iet).values():
            for g in v:
                root = None
                for i in g:
                    if i.is_HaloSpot:
                        root = i
                        mapper[root] = [root.body]
                    elif root and all(j.is_Affine
                                      for j in FindNodes(Iteration).visit(i)):
                        mapper[i] = None
                        root = None
        mapper = {
            k: k._rebuild(body=List(body=v)) if v else v
            for k, v in mapper.items()
        iet = Transformer(mapper).visit(iet)

        return iet, {}
Exemplo n.º 12
    def _optimize_halospots(self, iet):
        Optimize the HaloSpots in ``iet``.

        * Remove all USELESS HaloSpots;
        * Merge all hoistable HaloSpots with their root HaloSpot, thus
          removing redundant communications and anticipating communications
          that will be required by later Iterations.
        # Drop USELESS HaloSpots
        mapper = {hs: hs.body for hs in FindNodes(HaloSpot).visit(iet) if hs.is_Useless}
        iet = Transformer(mapper, nested=True).visit(iet)

        # Handle `hoistable` HaloSpots
        mapper = {}
        for halo_spots in MapNodes(Iteration, HaloSpot).visit(iet).values():
            root = halo_spots[0]
            halo_schemes = [hs.halo_scheme.project(hs.hoistable) for hs in halo_spots[1:]]
            mapper[root] = root._rebuild(halo_scheme=root.halo_scheme.union(halo_schemes))
            mapper.update({hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(hs.hoistable))
                           for hs in halo_spots[1:]})
        iet = Transformer(mapper, nested=True).visit(iet)

        # At this point, some HaloSpots may have become empty (i.e., requiring
        # no communications), hence they can be removed
        # <HaloSpot(u,v)>           HaloSpot(u,v)
        #   <A>                       <A>
        # <HaloSpot()>      ---->   <B>
        #   <B>
        mapper = {i: i.body for i in FindNodes(HaloSpot).visit(iet) if i.is_empty}
        iet = Transformer(mapper, nested=True).visit(iet)

        # Finally, we try to move HaloSpot-free Iteration nests within HaloSpot
        # subtrees, to overlap as much computation as possible. The HaloSpot-free
        # Iteration nests must be fully affine, otherwise we wouldn't be able to
        # honour the data dependences along the halo
        # <HaloSpot(u,v)>            HaloSpot(u,v)
        #   <A>             ---->      <A>
        # <B>              affine?     <B>
        # Here, <B> doesn't require any halo exchange, but it might still need the
        # output of <A>; thus, if we do computation/communication overlap over <A>
        # *and* want to embed <B> within the HaloSpot, then <B>'s iteration space
        # will have to be split as well. For this, <B> must be affine.
        mapper = {}
        for v in FindAdjacent((HaloSpot, Iteration)).visit(iet).values():
            for g in v:
                root = None
                for i in g:
                    if i.is_HaloSpot:
                        root = i
                        mapper[root] = [root.body]
                    elif root and all(j.is_Affine for j in FindNodes(Iteration).visit(i)):
                        mapper[i] = None
                        root = None
        mapper = {k: k._rebuild(body=List(body=v)) if v else v for k, v in mapper.items()}
        iet = Transformer(mapper).visit(iet)

        return iet, {}
Exemplo n.º 13
def _hoist_halospots(iet):
    Hoist HaloSpots from inner to outer Iterations where all data dependencies
    would be honored.

    # Hoisting rules -- if the retval is True, then it means the input `dep` is not
    # a stopper to halo hoisting

    def rule0(dep, candidates, loc_dims):
        # E.g., `dep=W<f,[x]> -> R<f,[x-1]>` and `candidates=({time}, {x})` => False
        # E.g., `dep=W<f,[t1, x, y]> -> R<f,[t0, x-1, y+1]>`, `dep.cause={t,time}` and
        #       `candidates=({x},)` => True
        return (all(i & set(dep.distance_mapper) for i in candidates)
                and not any(i & dep.cause for i in candidates)
                and not any(i & loc_dims for i in candidates))

    def rule1(dep, candidates, loc_dims):
        # An increment isn't a stopper to hoisting
        return dep.write.is_increment

    hoist_rules = [rule0, rule1]

    # Precompute scopes to save time
    scopes = {
        i: Scope([e.expr for e in v])
        for i, v in MapNodes().visit(iet).items()

    # Analysis
    hsmapper = {}
    imapper = defaultdict(list)
    for iters, halo_spots in MapNodes(Iteration, HaloSpot,
        for hs in halo_spots:
            hsmapper[hs] = hs.halo_scheme

            for f, (loc_indices, _) in hs.fmapper.items():
                loc_dims = frozenset().union(
                    [q for d in loc_indices for q in d._defines])

                for n, i in enumerate(iters):
                    candidates = [i.dim._defines for i in iters[n:]]

                    test = True
                    for dep in scopes[i].d_flow.project(f):
                        if any(
                                rule(dep, candidates, loc_dims)
                                for rule in hoist_rules):
                        test = False
                    if test:
                        hsmapper[hs] = hsmapper[hs].drop(f)

    # Post-process analysis
    mapper = {
        i: HaloSpot(HaloScheme.union(hss), i._rebuild())
        for i, hss in imapper.items()
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in hsmapper.items()

    # Transform the IET hoisting/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    # Clean up: de-nest HaloSpots if necessary
    mapper = {}
    for hs in FindNodes(HaloSpot).visit(iet):
        if hs.body.is_HaloSpot:
            halo_scheme = HaloScheme.union(
                [hs.halo_scheme, hs.body.halo_scheme])
            mapper[hs] = hs._rebuild(halo_scheme=halo_scheme,
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Exemplo n.º 14
def _merge_halospots(iet):
    Merge HaloSpots on the same Iteration tree level where all data dependencies
    would be honored.

    # Merge rules -- if the retval is True, then it means the input `dep` is not
    # a stopper to halo merging

    def rule0(dep, hs, loc_indices):
        # E.g., `dep=W<f,[t1, x]> -> R<f,[t0, x-1]>` => True
        return not any(
            d in hs.dimensions or dep.distance_mapper[d] is S.Infinity
            for d in dep.cause)

    def rule1(dep, hs, loc_indices):
        # TODO This is apparently never hit, but feeling uncomfortable to remove it
        return dep.is_regular and all(not any(dep.read.touched_halo(d.root))
                                      for d in dep.cause)

    def rule2(dep, hs, loc_indices):
        # E.g., `dep=W<f,[t1, x+1]> -> R<f,[t1, xl+1]>` and `loc_indices={t: t0}` => True
        return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v
                   for d, v in loc_indices.items())

    merge_rules = [rule0, rule1, rule2]

    # Analysis
    mapper = {}
    for i, halo_spots in MapNodes(Iteration, HaloSpot,
        if i is None or len(halo_spots) <= 1:

        scope = Scope([e.expr for e in FindNodes(Expression).visit(i)])

        hs0 = halo_spots[0]
        mapper[hs0] = hs0.halo_scheme

        for hs in halo_spots[1:]:
            mapper[hs] = hs.halo_scheme

            for f, (loc_indices, _) in hs.fmapper.items():
                test = True
                for dep in scope.d_flow.project(f):
                    if any(rule(dep, hs, loc_indices) for rule in merge_rules):
                    test = False
                if test:
                        mapper[hs0] = HaloScheme.union(
                        mapper[hs] = mapper[hs].drop(f)
                    except ValueError:
                        # `hs.loc_indices=<frozendict {t: t1}` and
                        # `hs0.loc_indices=<frozendict {t: t0}`

    # Post-process analysis
    mapper = {
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in mapper.items()

    # Transform the IET merging/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet