Пример #1
0
 def _fetch_scope(self, clusters):
     key = as_tuple(clusters)
     if key not in self.state.scopes:
         self.state.scopes[key] = Scope(flatten(c.exprs for c in key))
     return self.state.scopes[key]
def groupby(clusters):
    """
    Attempt grouping :class:`PartialCluster`s together to create bigger
    :class:`PartialCluster`s (i.e., containing more expressions).

    .. note::

        This function relies on advanced data dependency analysis tools
        based upon classic Lamport theory.
    """
    clusters = clusters.unfreeze()

    processed = ClusterGroup()
    for c in clusters:
        if c.guards:
            # Guarded clusters cannot be grouped together
            processed.append(c)
            continue
        fused = False
        for candidate in reversed(list(processed)):
            # Collect all relevant data dependences
            scope = Scope(exprs=candidate.exprs + c.exprs)

            # Collect anti-dependences preventing grouping
            anti = scope.d_anti.carried() - scope.d_anti.increment
            funcs = [i.function for i in anti]

            # Collect flow-dependences breaking the search
            flow = scope.d_flow - (scope.d_flow.inplace() +
                                   scope.d_flow.increment)
            flow = {i.cause for i in flow}

            if candidate.ispace.is_compatible(c.ispace) and\
                    all(is_local(i, candidate, c, clusters) for i in funcs):
                # /c/ will be fused into /candidate/. All fusion-induced anti
                # dependences are eliminated through so called "index bumping and
                # array contraction", which transforms array accesses into scalars

                # Optimization: we also bump-and-contract the Arrays inducing
                # non-carried dependences, to avoid useless memory accesses
                funcs += [
                    i.function for i in scope.d_flow.independent()
                    if is_local(i.function, candidate, c, clusters)
                ]

                bump_and_contract(funcs, candidate, c)
                candidate.squash(c)
                fused = True
                break
            elif anti:
                # Data dependences prevent fusion with earlier clusters, so
                # must break up the search
                c.atomics.update(set(anti.cause))
                break
            elif set(flow).intersection(candidate.atomics):
                # We cannot even attempt fusing with earlier clusters, as
                # otherwise the existing flow dependences wouldn't be honored
                break
        # Fallback
        if not fused:
            processed.append(c)

    return processed
Пример #3
0
    def _build_dag(self, cgroups, prefix):
        """
        A DAG captures data dependences between ClusterGroups up to the iteration
        space depth dictated by ``prefix``.

        Examples
        --------
        Consider two ClusterGroups `c0` and `c1`, and ``prefix=[i]``.

        1) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j] ...
           Non-carried flow-dependence, so `cg1` must go after `cg0`.

        2) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j-1] ...
           Carried flow-dependence in `j`, so `cg1` must go after `cg0`.

        3) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i, j+1] ...
           Carried anti-dependence in `j`, so `cg1` must go after `cg0`.

        4) cg0 := b[i, j] = ...
           cg1 := ... = ... b[i-1, j+1] ...
           Carried flow-dependence in `i`, so `cg1` can safely go before or after
           `cg0`. Note: the `j+1` in `cg1` has no impact -- the actual dependence
           betweeb `b[i, j]` and `b[i-1, j+1]` is along `i`.
        """
        prefix = {i.dim for i in as_tuple(prefix)}

        dag = DAG(nodes=cgroups)
        for n, cg0 in enumerate(cgroups):
            for cg1 in cgroups[n + 1:]:
                scope = Scope(exprs=cg0.exprs + cg1.exprs)

                # Handle anti-dependences
                deps = scope.d_anti - (cg0.scope.d_anti + cg1.scope.d_anti)
                if any(i.cause & prefix for i in deps):
                    # Anti-dependences break the execution flow
                    # i) ClusterGroups between `cg0` and `cg1` must precede `cg1`
                    for cg2 in cgroups[n:cgroups.index(cg1)]:
                        dag.add_edge(cg2, cg1)
                    # ii) ClusterGroups after `cg1` cannot precede `cg1`
                    for cg2 in cgroups[cgroups.index(cg1) + 1:]:
                        dag.add_edge(cg1, cg2)
                    break
                elif deps:
                    dag.add_edge(cg0, cg1)

                # Flow-dependences along one of the `prefix` Dimensions can
                # be ignored; all others require sequentialization
                deps = scope.d_flow - (cg0.scope.d_flow + cg1.scope.d_flow)
                if any(not (i.cause and i.cause & prefix) for i in deps):
                    dag.add_edge(cg0, cg1)
                    continue

                # Handle increment-after-write dependences
                deps = scope.d_output - (cg0.scope.d_output +
                                         cg1.scope.d_output)
                if any(i.is_iaw for i in deps):
                    dag.add_edge(cg0, cg1)
                    continue

        return dag
Пример #4
0
 def scope(self):
     return Scope(exprs=self.exprs)
Пример #5
0
def _hoist_halospots(iet):
    """
    Hoist HaloSpots from inner to outer Iterations where all data dependencies
    would be honored.
    """

    # Hoisting rules -- if the retval is True, then it means the input `dep` is not
    # a stopper to halo hoisting

    def rule0(dep, candidates):
        # E.g., `dep=W<f,[x]> -> R<f,[x-1]>` and `candidates=(time, x)` => False
        # E.g., `dep=W<f,[t1, x, y]> -> R<f,[t0, x-1, y+1]>`, `dep.cause={t,time}` and
        #       `candidates=(x,)` => True
        return (all(d in dep.distance_mapper for d in candidates)
                and not dep.cause & candidates)

    def rule1(dep, candidates):
        # An increment isn't a stopper to hoisting
        return dep.write.is_increment

    hoist_rules = [rule0, rule1]

    # Precompute scopes to save time
    scopes = {
        i: Scope([e.expr for e in v])
        for i, v in MapNodes().visit(iet).items()
    }

    # Analysis
    hsmapper = {}
    imapper = defaultdict(list)
    for iters, halo_spots in MapNodes(Iteration, HaloSpot,
                                      'groupby').visit(iet).items():
        for hs in halo_spots:
            hsmapper[hs] = hs.halo_scheme

            for f in hs.fmapper:
                for n, i in enumerate(iters):
                    candidates = set().union(
                        *[i.dim._defines for i in iters[n:]])

                    test = True
                    for dep in scopes[i].d_flow.project(f):
                        if any(rule(dep, candidates) for rule in hoist_rules):
                            continue
                        test = False
                        break
                    if test:
                        hsmapper[hs] = hsmapper[hs].drop(f)
                        imapper[i].append(hs.halo_scheme.project(f))
                        break

    # Post-process analysis
    mapper = {
        i: HaloSpot(HaloScheme.union(hss), i._rebuild())
        for i, hss in imapper.items()
    }
    mapper.update({
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in hsmapper.items()
    })

    # Transform the IET hoisting/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    # Clean up: de-nest HaloSpots if necessary
    mapper = {}
    for hs in FindNodes(HaloSpot).visit(iet):
        if hs.body.is_HaloSpot:
            halo_scheme = HaloScheme.union(
                [hs.halo_scheme, hs.body.halo_scheme])
            mapper[hs] = hs._rebuild(halo_scheme=halo_scheme,
                                     body=hs.body.body)
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Пример #6
0
    def callback(self, clusters, prefix, backlog=None, known_break=None):
        if not prefix:
            return clusters

        known_break = known_break or set()
        backlog = backlog or []

        # Take the innermost Dimension -- no other Clusters other than those in
        # `clusters` are supposed to share it
        candidates = prefix[-1].dim._defines

        scope = Scope(exprs=flatten(c.exprs for c in clusters))

        # Handle the nastiest case -- ambiguity due to the presence of both a
        # flow- and an anti-dependence.
        #
        # Note: in most cases, `scope.d_anti.cause == {}` -- either because
        # `scope.d_anti == {}` or because the few anti dependences are not carried
        # in any Dimension. We exploit this observation so that we only compute
        # `d_flow`, which instead may be expensive, when strictly necessary
        maybe_break = scope.d_anti.cause & candidates
        if len(clusters) > 1 and maybe_break:
            require_break = scope.d_flow.cause & maybe_break
            if require_break:
                backlog = [clusters[-1]] + backlog
                # Try with increasingly smaller ClusterGroups until the ambiguity is gone
                return self.callback(clusters[:-1], prefix, backlog,
                                     require_break)

        # Schedule Clusters over different IterationSpaces if this increases parallelism
        for i in range(1, len(clusters)):
            if self._break_for_parallelism(scope, candidates, i):
                return self.callback(clusters[:i], prefix,
                                     clusters[i:] + backlog,
                                     candidates | known_break)

        # Compute iteration direction
        idir = {
            d: Backward
            for d in candidates if d.root in scope.d_anti.cause
        }
        if maybe_break:
            idir.update({
                d: Forward
                for d in candidates if d.root in scope.d_flow.cause
            })
        idir.update({d: Forward for d in candidates if d not in idir})

        # Enforce iteration direction on each Cluster
        processed = []
        for c in clusters:
            ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators,
                                    {
                                        **c.ispace.directions,
                                        **idir
                                    })
            processed.append(c.rebuild(ispace=ispace))

        if not backlog:
            return processed

        # Handle the backlog -- the Clusters characterized by flow- and anti-dependences
        # along one or more Dimensions
        idir = {d: Any for d in known_break}
        for i, c in enumerate(list(backlog)):
            ispace = IterationSpace(c.ispace.intervals.lift(known_break),
                                    c.ispace.sub_iterators, {
                                        **c.ispace.directions,
                                        **idir
                                    })
            dspace = c.dspace.lift(known_break)
            backlog[i] = c.rebuild(ispace=ispace, dspace=dspace)

        return processed + self.callback(backlog, prefix)
Пример #7
0
def _merge_halospots(iet):
    """
    Merge HaloSpots on the same Iteration tree level where all data dependencies
    would be honored.
    """

    # Merge rules -- if the retval is True, then it means the input `dep` is not
    # a stopper to halo merging

    def rule0(dep, hs, loc_indices):
        # E.g., `dep=W<f,[t1, x]> -> R<f,[t0, x-1]>` => True
        return not any(
            d in hs.dimensions or dep.distance_mapper[d] is S.Infinity
            for d in dep.cause)

    def rule1(dep, hs, loc_indices):
        # TODO This is apparently never hit, but feeling uncomfortable to remove it
        return dep.is_regular and all(not any(dep.read.touched_halo(d.root))
                                      for d in dep.cause)

    def rule2(dep, hs, loc_indices):
        # E.g., `dep=W<f,[t1, x+1]> -> R<f,[t1, xl+1]>` and `loc_indices={t: t0}` => True
        return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v
                   for d, v in loc_indices.items())

    merge_rules = [rule0, rule1, rule2]

    # Analysis
    mapper = {}
    for i, halo_spots in MapNodes(Iteration, HaloSpot,
                                  'immediate').visit(iet).items():
        if i is None or len(halo_spots) <= 1:
            continue

        scope = Scope([e.expr for e in FindNodes(Expression).visit(i)])

        hs0 = halo_spots[0]
        mapper[hs0] = hs0.halo_scheme

        for hs in halo_spots[1:]:
            mapper[hs] = hs.halo_scheme

            for f, (loc_indices, _) in hs.fmapper.items():
                test = True
                for dep in scope.d_flow.project(f):
                    if any(rule(dep, hs, loc_indices) for rule in merge_rules):
                        continue
                    test = False
                    break
                if test:
                    try:
                        mapper[hs0] = HaloScheme.union(
                            [mapper[hs0],
                             hs.halo_scheme.project(f)])
                        mapper[hs] = mapper[hs].drop(f)
                    except ValueError:
                        # `hs.loc_indices=<frozendict {t: t1}` and
                        # `hs0.loc_indices=<frozendict {t: t0}`
                        pass

    # Post-process analysis
    mapper = {
        i: i.body if hs.is_void else i._rebuild(halo_scheme=hs)
        for i, hs in mapper.items()
    }

    # Transform the IET merging/dropping HaloSpots as according to the analysis
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet
Пример #8
0
def groupby(clusters):
    """
    Group PartialClusters together to create "fatter" PartialClusters
    (i.e., containing more expressions).

    Notes
    -----
    This function relies on advanced data dependency analysis tools based upon
    classic Lamport theory.
    """
    clusters = clusters.unfreeze()

    processed = ClusterGroup()
    for c in clusters:
        fused = False
        for candidate in reversed(list(processed)):
            # Guarded clusters cannot be grouped together
            if c.guards:
                break

            # Collect all relevant data dependences
            scope = Scope(exprs=candidate.exprs + c.exprs)

            # Collect anti-dependences preventing grouping
            anti = scope.d_anti.carried() - scope.d_anti.increment
            funcs = set(anti.functions)

            # Collect flow-dependences breaking the search
            flow = scope.d_flow - (scope.d_flow.inplace() +
                                   scope.d_flow.increment)

            # Can we group `c` with `candidate`?
            test0 = not candidate.guards  # No intervening guards
            test1 = candidate.ispace.is_compatible(
                c.ispace)  # Compatible ispaces
            test2 = all(is_local(i, candidate, c, clusters)
                        for i in funcs)  # No antideps
            if test0 and test1 and test2:
                # Yes, `c` can be grouped with `candidate`. All anti-dependences
                # (if any) can be eliminated through "index bumping and array
                # contraction", which turns Array temporaries into Scalar temporaries

                # Optimization: we also bump-and-contract the Arrays inducing
                # non-carried dependences, to minimize the working set
                funcs.update({
                    i.function
                    for i in scope.d_flow.independent()
                    if is_local(i.function, candidate, c, clusters)
                })

                bump_and_contract(funcs, candidate, c)
                candidate.squash(c)
                fused = True
                break
            elif anti:
                # Data dependences prevent fusion with earlier Clusters, so
                # must break up the search
                c.atomics.update(anti.cause)
                break
            elif flow.cause & candidate.atomics:
                # We cannot even attempt fusing with earlier Clusters, as
                # otherwise the carried flow dependences wouldn't be honored
                break
            elif set(candidate.guards) & set(c.dimensions):
                # Like above, we can't attempt fusion with earlier Clusters.
                # Time time because there are intervening conditionals along
                # one or more of the shared iteration dimensions
                break
        # Fallback
        if not fused:
            processed.append(c)

    return processed
Пример #9
0
def classify(exprs, ispace):
    """
    Produce the mapper ``Function -> HaloSchemeEntry``, which describes the
    necessary halo exchanges in the given Scope.
    """
    scope = Scope(exprs)

    mapper = {}
    for f, r in scope.reads.items():
        if not f.is_DiscreteFunction:
            continue
        elif not isinstance(f.grid, Grid):
            # TODO: improve me
            continue

        # For each data access, determine if (and what type of) a halo exchange
        # is required
        halo_labels = defaultdict(set)
        for i in r:
            v = {}
            for d in i.findices:
                if f.grid.is_distributed(d):
                    if i.affine(d):
                        thl, thr = i.touched_halo(d)
                        # Note: if the left-HALO is touched (i.e., `thl = True`), then
                        # the *right-HALO* is to be sent over in a halo exchange
                        v[(d, LEFT)] = (thr and STENCIL) or IDENTITY
                        v[(d, RIGHT)] = (thl and STENCIL) or IDENTITY
                    else:
                        v[(d, LEFT)] = STENCIL
                        v[(d, RIGHT)] = STENCIL
                else:
                    v[(d, i[d])] = NONE

            # Does `i` actually require a halo exchange?
            if not any(hl is STENCIL for hl in v.values()):
                continue

            # Derive diagonal halo exchanges from the previous analysis
            combs = list(product([LEFT, CENTER, RIGHT], repeat=len(f._dist_dimensions)))
            combs.remove((CENTER,)*len(f._dist_dimensions))
            for c in combs:
                key = (f._dist_dimensions, c)
                if all(v.get((d, s)) is STENCIL or s is CENTER for d, s in zip(*key)):
                    v[key] = STENCIL

            # Finally update the `halo_labels`
            for j, hl in v.items():
                halo_labels[j].add(hl)

        if not halo_labels:
            continue

        # Distinguish between Dimensions requiring a halo exchange and those which don't
        up_loc_indices, halos = defaultdict(list), []
        for (d, s), hl in halo_labels.items():
            try:
                hl.remove(IDENTITY)
            except KeyError:
                pass
            if not hl:
                continue
            elif len(hl) > 1:
                raise HaloSchemeException("Inconsistency found while building a halo "
                                          "scheme for `%s` along Dimension `%s`" % (f, d))
            elif hl.pop() is STENCIL:
                halos.append(Halo(d, s))
            else:
                up_loc_indices[d].append(s)

        # Process the loc_indices. Consider:
        # 1) u[t+1, x] = f(u[t, x])   => shift == 1
        # 2) u[t-1, x] = f(u[t, x])   => shift == 1
        # 3) u[t+1, x] = f(u[t+1, x]) => shift == 0
        # In the first and second cases, the x-halo should be inserted at `t`,
        # while in the last case it should be inserted at `t+1`.
        loc_indices = {}
        for d, aindices in up_loc_indices.items():
            try:
                func = Max if ispace.is_forward(d.root) else Min
            except KeyError:
                # Max or Min is the same since `d` isn't an `ispace` Dimension
                func = Max
            candidates = [i for i in aindices if not is_integer(i)]
            candidates = {(i.origin if d.is_Stepping else i) - d: i for i in candidates}
            try:
                loc_indices[d] = candidates[func(*candidates.keys())]
            except KeyError:
                # E.g., `aindices = [0, 1, d+1]` -- it doesn't really matter
                # what we put here, so we place 0 as it's the old behaviour
                loc_indices[d] = 0

        mapper[f] = HaloSchemeEntry(frozendict(loc_indices), frozenset(halos))

    return mapper
Пример #10
0
    def callback(self, clusters, prefix, backlog=None, known_break=None):
        if not prefix:
            return clusters

        known_break = known_break or set()
        backlog = backlog or []

        # Take the innermost Dimension -- no other Clusters other than those in
        # `clusters` are supposed to share it
        candidates = prefix[-1].dim._defines

        scope = Scope(exprs=flatten(c.exprs for c in clusters))

        # The nastiest case:
        # eq0 := u[t+1, x] = ... u[t, x]
        # eq1 := v[t+1, x] = ... v[t, x] ... u[t, x] ... u[t+1, x] ... u[t+2, x]
        # Here, `eq0` marches forward along `t`, while `eq1` has both a flow and an
        # anti dependence with `eq0`, which ultimately will require `eq1` to go in
        # a separate t-loop
        require_break = (scope.d_flow.cause & scope.d_anti.cause) & candidates
        if require_break and len(clusters) > 1:
            backlog = [clusters[-1]] + backlog
            # Try with increasingly smaller Cluster groups until the ambiguity is solved
            return self.callback(clusters[:-1], prefix, backlog, require_break)

        # If the flow- or anti-dependences are not coupled, one or more Clusters
        # might be scheduled separately, to increase parallelism (this is basically
        # what low-level compilers call "loop fission")
        for n, _ in enumerate(clusters):
            d_cross = scope.d_from_access(scope.a_query(n, 'R')).cross()
            if any(d.is_storage_volatile(candidates) for d in d_cross):
                break
            elif d_cross.cause & candidates:
                if n > 0:
                    return self.callback(
                        clusters[:n], prefix, clusters[n:] + backlog,
                        (d_cross.cause & candidates) | known_break)
                break

        # Compute iteration direction
        direction = {
            d: Backward
            for d in candidates if d.root in scope.d_anti.cause
        }
        direction.update(
            {d: Forward
             for d in candidates if d.root in scope.d_flow.cause})
        direction.update(
            {d: Forward
             for d in candidates if d not in direction})

        # Enforce iteration direction on each Cluster
        processed = []
        for c in clusters:
            ispace = IterationSpace(c.ispace.intervals, c.ispace.sub_iterators,
                                    {
                                        **c.ispace.directions,
                                        **direction
                                    })
            processed.append(Cluster(c.exprs, ispace, c.dspace))

        if not backlog:
            return processed

        # Handle the backlog -- the Clusters characterized by flow- and anti-dependences
        # along one or more Dimensions
        direction = {d: Any for d in known_break}
        for i, c in enumerate(list(backlog)):
            ispace = IterationSpace(c.ispace.intervals.lift(known_break),
                                    c.ispace.sub_iterators, {
                                        **c.ispace.directions,
                                        **direction
                                    })
            backlog[i] = Cluster(c.exprs, ispace, c.dspace)

        return processed + self.callback(backlog, prefix)
Пример #11
0
 def _fetch_scope(self, clusters):
     exprs = flatten(c.exprs for c in as_tuple(clusters))
     key = tuple(exprs)
     if key not in self.state.scopes:
         self.state.scopes[key] = Scope(exprs)
     return self.state.scopes[key]