Пример #1
0
def st_make_halo(stree):
    """
    Add :class:`NodeHalo`s to a :class:`ScheduleTree`. A HaloNode captures
    the halo exchanges that should take place before executing the sub-tree;
    these are described by means of a :class:`HaloScheme`.
    """
    # Build a HaloScheme for each expression bundle
    halo_schemes = {}
    for n in findall(stree, lambda i: i.is_Exprs):
        try:
            halo_schemes[n] = HaloScheme(n.exprs, n.ispace, n.dspace)
        except HaloSchemeException as e:
            if configuration['mpi']:
                raise RuntimeError(str(e))

    # Insert the HaloScheme at a suitable level in the ScheduleTree
    mapper = {}
    for k, hs in halo_schemes.items():
        for f, v in hs.fmapper.items():
            spot = k
            ancestors = [n for n in k.ancestors if n.is_Iteration]
            for n in ancestors:
                test0 = any(n.dim is i.dim for i in v.halos)
                test1 = n.dim not in [i.root for i in v.loc_indices]
                if test0 or test1:
                    spot = n
                    break
            mapper.setdefault(spot, []).append((f, v))
    for spot, entries in mapper.items():
        insert(NodeHalo(HaloScheme(fmapper=dict(entries))), spot.parent,
               [spot])

    return stree
Пример #2
0
def _hoist_halospots(iet):
    """
    Hoist HaloSpots from inner to outer Iterations where all data dependencies
    would be honored.
    """
    # Precompute scopes to save time
    scopes = {
        i: Scope([e.expr for e in v])
        for i, v in MapNodes().visit(iet).items()
    }

    # Analysis
    hsmapper = {}
    imapper = defaultdict(list)
    for iters, halo_spots in MapNodes(Iteration, HaloSpot,
                                      'groupby').visit(iet).items():
        for hs in halo_spots:
            hsmapper[hs] = hs.halo_scheme

            for f in hs.fmapper:
                for n, i in enumerate(iters):
                    maybe_hoistable = set().union(
                        *[i.dim._defines for i in iters[n:]])
                    d_flow = scopes[i].d_flow.project(f)

                    if all(not (dep.cause
                                & maybe_hoistable) or dep.write.is_increment
                           for dep in d_flow):
                        hsmapper[hs] = hsmapper[hs].drop(f)
                        imapper[i].append(hs.halo_scheme.project(f))
                        break

    # Post-process analysis
    mapper = {
        i: HaloSpot(HaloScheme.union(hss), i._rebuild())
        for i, hss in imapper.items()
    }
    mapper.update({
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in hsmapper.items()
    })

    # Transform the IET hoisting/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    # Clean up: de-nest HaloSpots if necessary
    mapper = {}
    for hs in FindNodes(HaloSpot).visit(iet):
        if hs.body.is_HaloSpot:
            halo_scheme = HaloScheme.union(
                [hs.halo_scheme, hs.body.halo_scheme])
            mapper[hs] = hs._rebuild(halo_scheme=halo_scheme,
                                     body=hs.body.body)
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Пример #3
0
def stree_make_halo(stree):
    """
    Add NodeHalos to a ScheduleTree. A NodeHalo captures the halo exchanges
    that should take place before executing the sub-tree; these are described
    by means of a HaloScheme.
    """
    # Build a HaloScheme for each expression bundle
    halo_schemes = {}
    for n in findall(stree, lambda i: i.is_Exprs):
        try:
            halo_schemes[n] = HaloScheme(n.exprs, n.ispace)
        except HaloSchemeException as e:
            if configuration['mpi']:
                raise RuntimeError(str(e))

    # Split a HaloScheme based on where it should be inserted
    # For example, it's possible that, for a given HaloScheme, a Function's
    # halo needs to be exchanged at a certain `stree` depth, while another
    # Function's halo needs to be exchanged before some other nodes
    mapper = {}
    for k, hs in halo_schemes.items():
        for f, v in hs.fmapper.items():
            spot = k
            ancestors = [n for n in k.ancestors if n.is_Iteration]
            for n in ancestors:
                # Place the halo exchange right before the first
                # distributed Dimension which requires it
                if any(i.dim in n.dim._defines for i in v.halos):
                    spot = n
                    break
            mapper.setdefault(spot, []).append(hs.project(f))

    # Now fuse the HaloSchemes at the same `stree` depth and perform the insertion
    for spot, halo_schemes in mapper.items():
        insert(NodeHalo(HaloScheme.union(halo_schemes)), spot.parent, [spot])

    return stree
Пример #4
0
def _merge_halospots(iet):
    """
    Merge HaloSpots on the same Iteration tree level where all data dependencies
    would be honored.
    """
    # Analysis
    mapper = {}
    for i, halo_spots in MapNodes(Iteration, HaloSpot,
                                  'immediate').visit(iet).items():
        if i is None or len(halo_spots) <= 1:
            continue

        scope = Scope([e.expr for e in FindNodes(Expression).visit(i)])

        hs0 = halo_spots[0]
        mapper[hs0] = hs0.halo_scheme

        for hs in halo_spots[1:]:
            mapper[hs] = hs.halo_scheme

            for f in hs.fmapper:
                test = True
                for dep in scope.d_flow.project(f):
                    if not (dep.cause & set(hs.dimensions)):
                        continue
                    if dep.is_regular and all(
                            not any(dep.read.touched_halo(c.root))
                            for c in dep.cause):
                        continue
                    test = False
                    break
                if test:
                    mapper[hs0] = HaloScheme.union(
                        [mapper[hs0], hs.halo_scheme.project(f)])
                    mapper[hs] = mapper[hs].drop(f)

    # Post-process analysis
    mapper = {
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in mapper.items()
    }

    # Transform the IET merging/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Пример #5
0
def st_make_halo(stree):
    """
    Add :class:`NodeHalo` to a :class:`ScheduleTree`. A halo node describes
    what halo exchanges should take place before executing the sub-tree.
    """
    if not configuration['mpi']:
        # TODO: This will be dropped as soon as stronger analysis will have
        # been implemented
        return stree

    processed = {}
    for n in LevelOrderIter(stree, stop=lambda i: i.parent in processed):
        if not n.is_Iteration:
            continue
        exprs = flatten(i.exprs for i in findall(n, lambda i: i.is_Exprs))
        try:
            halo_scheme = HaloScheme(exprs)
            if n.dim in halo_scheme.dmapper:
                processed[n] = NodeHalo(halo_scheme)
        except HaloSchemeException:
            # We should get here only when trying to compute a halo
            # scheme for a group of expressions that belong to different
            # iteration spaces. We expect proper halo schemes to be built
            # as the `stree` visit proceeds.
            # TODO: However, at the end, we should check that a halo scheme,
            # possibly even a "void" one, has been built for *all* of the
            # expressions, and error out otherwise.
            continue
        except RuntimeError as e:
            if configuration['mpi'] is True:
                raise RuntimeError(str(e))

    for k, v in processed.items():
        insert(v, k.parent, [k])

    return stree
Пример #6
0
    def _optimize_halospots(self, iet):
        """
        Optimize the HaloSpots in ``iet``.

        * Remove all ``useless`` HaloSpots;
        * Merge all ``hoistable`` HaloSpots with their root HaloSpot, thus
          removing redundant communications and anticipating communications
          that will be required by later Iterations.
        """
        # Drop `useless` HaloSpots
        mapper = {
            hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(hs.useless))
            for hs in FindNodes(HaloSpot).visit(iet)
        }
        iet = Transformer(mapper, nested=True).visit(iet)

        # Handle `hoistable` HaloSpots
        # First, we merge `hoistable` HaloSpots together, to anticipate communications
        mapper = {}
        for tree in retrieve_iteration_tree(iet):
            halo_spots = FindNodes(HaloSpot).visit(tree.root)
            if not halo_spots:
                continue
            root = halo_spots[0]
            if root in mapper:
                continue
            hss = [root.halo_scheme]
            hss.extend([
                hs.halo_scheme.project(hs.hoistable) for hs in halo_spots[1:]
            ])
            try:
                mapper[root] = root._rebuild(halo_scheme=HaloScheme.union(hss))
            except ValueError:
                # HaloSpots have non-matching `loc_indices` and therefore can't be merged
                warning("Found hoistable HaloSpots with disjoint loc_indices, "
                        "skipping optimization")
                continue
            for hs in halo_spots[1:]:
                halo_scheme = hs.halo_scheme.drop(hs.hoistable)
                if halo_scheme.is_void:
                    mapper[hs] = hs.body
                else:
                    mapper[hs] = hs._rebuild(halo_scheme=halo_scheme)
        iet = Transformer(mapper, nested=True).visit(iet)

        # Then, we make sure the halo exchanges get performed *before*
        # the first distributed Dimension. Again, we do this to anticipate
        # communications, which hopefully has a pay off in performance
        #
        # <Iteration x>                    <HaloSpot(u)>, in y
        #   <HaloSpot(u)>, in y    ---->   <Iteration x>
        #   <Iteration y>                    <Iteration y>
        mapper = {}
        for i, halo_spots in MapNodes(Iteration, HaloSpot).visit(iet).items():
            hoistable = [hs for hs in halo_spots if hs.hoistable]
            if not hoistable:
                continue
            elif len(hoistable) > 1:
                # We should never end up here, but for now we can't prove it formally
                warning(
                    "Found multiple hoistable HaloSpots, skipping optimization"
                )
                continue
            hs = hoistable.pop()
            if hs in mapper:
                continue
            if i.dim.root in hs.dimensions:
                halo_scheme = hs.halo_scheme.drop(hs.hoistable)
                if halo_scheme.is_void:
                    mapper[hs] = hs.body
                else:
                    mapper[hs] = hs._rebuild(halo_scheme=halo_scheme)

                halo_scheme = hs.halo_scheme.project(hs.hoistable)
                mapper[i] = hs._rebuild(halo_scheme=halo_scheme,
                                        body=i._rebuild())
        iet = Transformer(mapper, nested=True).visit(iet)

        # Finally, we try to move HaloSpot-free Iteration nests within HaloSpot
        # subtrees, to overlap as much computation as possible. The HaloSpot-free
        # Iteration nests must be fully affine, otherwise we wouldn't be able to
        # honour the data dependences along the halo
        #
        # <HaloSpot(u,v)>            HaloSpot(u,v)
        #   <A>             ---->      <A>
        # <B>              affine?     <B>
        #
        # Here, <B> doesn't require any halo exchange, but it might still need the
        # output of <A>; thus, if we do computation/communication overlap over <A>
        # *and* want to embed <B> within the HaloSpot, then <B>'s iteration space
        # will have to be split as well. For this, <B> must be affine.
        mapper = {}
        for v in FindAdjacent((HaloSpot, Iteration)).visit(iet).values():
            for g in v:
                root = None
                for i in g:
                    if i.is_HaloSpot:
                        root = i
                        mapper[root] = [root.body]
                    elif root and all(j.is_Affine
                                      for j in FindNodes(Iteration).visit(i)):
                        mapper[root].append(i)
                        mapper[i] = None
                    else:
                        root = None
        mapper = {
            k: k._rebuild(body=List(body=v)) if v else v
            for k, v in mapper.items()
        }
        iet = Transformer(mapper).visit(iet)

        return iet, {}
Пример #7
0
def _hoist_halospots(iet):
    """
    Hoist HaloSpots from inner to outer Iterations where all data dependencies
    would be honored.
    """

    # Hoisting rules -- if the retval is True, then it means the input `dep` is not
    # a stopper to halo hoisting

    def rule0(dep, candidates, loc_dims):
        # E.g., `dep=W<f,[x]> -> R<f,[x-1]>` and `candidates=({time}, {x})` => False
        # E.g., `dep=W<f,[t1, x, y]> -> R<f,[t0, x-1, y+1]>`, `dep.cause={t,time}` and
        #       `candidates=({x},)` => True
        return (all(i & set(dep.distance_mapper) for i in candidates)
                and not any(i & dep.cause for i in candidates)
                and not any(i & loc_dims for i in candidates))

    def rule1(dep, candidates, loc_dims):
        # An increment isn't a stopper to hoisting
        return dep.write.is_increment

    hoist_rules = [rule0, rule1]

    # Precompute scopes to save time
    scopes = {
        i: Scope([e.expr for e in v])
        for i, v in MapNodes().visit(iet).items()
    }

    # Analysis
    hsmapper = {}
    imapper = defaultdict(list)
    for iters, halo_spots in MapNodes(Iteration, HaloSpot,
                                      'groupby').visit(iet).items():
        for hs in halo_spots:
            hsmapper[hs] = hs.halo_scheme

            for f, (loc_indices, _) in hs.fmapper.items():
                loc_dims = frozenset().union(
                    [q for d in loc_indices for q in d._defines])

                for n, i in enumerate(iters):
                    candidates = [i.dim._defines for i in iters[n:]]

                    test = True
                    for dep in scopes[i].d_flow.project(f):
                        if any(
                                rule(dep, candidates, loc_dims)
                                for rule in hoist_rules):
                            continue
                        test = False
                        break
                    if test:
                        hsmapper[hs] = hsmapper[hs].drop(f)
                        imapper[i].append(hs.halo_scheme.project(f))
                        break

    # Post-process analysis
    mapper = {
        i: HaloSpot(HaloScheme.union(hss), i._rebuild())
        for i, hss in imapper.items()
    }
    mapper.update({
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in hsmapper.items()
    })

    # Transform the IET hoisting/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    # Clean up: de-nest HaloSpots if necessary
    mapper = {}
    for hs in FindNodes(HaloSpot).visit(iet):
        if hs.body.is_HaloSpot:
            halo_scheme = HaloScheme.union(
                [hs.halo_scheme, hs.body.halo_scheme])
            mapper[hs] = hs._rebuild(halo_scheme=halo_scheme,
                                     body=hs.body.body)
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Пример #8
0
def _merge_halospots(iet):
    """
    Merge HaloSpots on the same Iteration tree level where all data dependencies
    would be honored.
    """

    # Merge rules -- if the retval is True, then it means the input `dep` is not
    # a stopper to halo merging

    def rule0(dep, hs, loc_indices):
        # E.g., `dep=W<f,[t1, x]> -> R<f,[t0, x-1]>` => True
        return not any(
            d in hs.dimensions or dep.distance_mapper[d] is S.Infinity
            for d in dep.cause)

    def rule1(dep, hs, loc_indices):
        # TODO This is apparently never hit, but feeling uncomfortable to remove it
        return dep.is_regular and all(not any(dep.read.touched_halo(d.root))
                                      for d in dep.cause)

    def rule2(dep, hs, loc_indices):
        # E.g., `dep=W<f,[t1, x+1]> -> R<f,[t1, xl+1]>` and `loc_indices={t: t0}` => True
        return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v
                   for d, v in loc_indices.items())

    merge_rules = [rule0, rule1, rule2]

    # Analysis
    mapper = {}
    for i, halo_spots in MapNodes(Iteration, HaloSpot,
                                  'immediate').visit(iet).items():
        if i is None or len(halo_spots) <= 1:
            continue

        scope = Scope([e.expr for e in FindNodes(Expression).visit(i)])

        hs0 = halo_spots[0]
        mapper[hs0] = hs0.halo_scheme

        for hs in halo_spots[1:]:
            mapper[hs] = hs.halo_scheme

            for f, (loc_indices, _) in hs.fmapper.items():
                test = True
                for dep in scope.d_flow.project(f):
                    if any(rule(dep, hs, loc_indices) for rule in merge_rules):
                        continue
                    test = False
                    break
                if test:
                    try:
                        mapper[hs0] = HaloScheme.union(
                            [mapper[hs0],
                             hs.halo_scheme.project(f)])
                        mapper[hs] = mapper[hs].drop(f)
                    except ValueError:
                        # `hs.loc_indices=<frozendict {t: t1}` and
                        # `hs0.loc_indices=<frozendict {t: t0}`
                        pass

    # Post-process analysis
    mapper = {
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in mapper.items()
    }

    # Transform the IET merging/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet