Exemplo n.º 1
0
 def _loop_wrapping(self, iet):
     """
     Emit a performance message if WRAPPABLE Iterations are found,
     as these are a symptom that unnecessary memory is being allocated.
     """
     for i in FindNodes(Iteration).visit(iet):
         if not i.is_Wrappable:
             continue
         perf_adv("Functions using modulo iteration along Dimension `%s` "
                  "may safely allocate a one slot smaller buffer" % i.dim)
     return iet, {}
Exemplo n.º 2
0
 def _loop_wrapping(self, iet):
     """
     Emit a performance warning if WRAPPABLE Iterations are found,
     as these are a symptom that unnecessary memory is being allocated.
     """
     for i in FindNodes(Iteration).visit(iet):
         if not i.is_Wrappable:
             continue
         perf_adv("Functions using modulo iteration along Dimension `%s` "
                  "may safely allocate a one slot smaller buffer" % i.dim)
     return iet, {}
Exemplo n.º 3
0
def optimize_halospots(iet):
    """
    Optimize the HaloSpots in ``iet``.

    * Remove all ``useless`` HaloSpots;
    * Merge all ``hoistable`` HaloSpots with their root HaloSpot, thus
      removing redundant communications and anticipating communications
      that will be required by later Iterations.
    """
    # Drop `useless` HaloSpots
    mapper = {
        hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(hs.useless))
        for hs in FindNodes(HaloSpot).visit(iet)
    }
    iet = Transformer(mapper, nested=True).visit(iet)

    # Handle `hoistable` HaloSpots
    # First, we merge `hoistable` HaloSpots together, to anticipate communications
    mapper = {}
    for tree in retrieve_iteration_tree(iet):
        halo_spots = FindNodes(HaloSpot).visit(tree.root)
        if not halo_spots:
            continue
        root = halo_spots[0]
        if root in mapper:
            continue
        hss = [root.halo_scheme]
        hss.extend(
            [hs.halo_scheme.project(hs.hoistable) for hs in halo_spots[1:]])
        try:
            mapper[root] = root._rebuild(halo_scheme=HaloScheme.union(hss))
        except ValueError:
            # HaloSpots have non-matching `loc_indices` and therefore can't be merged
            perf_adv("Found hoistable HaloSpots with disjoint loc_indices, "
                     "skipping optimization")
            continue
        for hs in halo_spots[1:]:
            halo_scheme = hs.halo_scheme.drop(hs.hoistable)
            if halo_scheme.is_void:
                mapper[hs] = hs.body
            else:
                mapper[hs] = hs._rebuild(halo_scheme=halo_scheme)
    iet = Transformer(mapper, nested=True).visit(iet)

    # Then, we make sure the halo exchanges get performed *before*
    # the first distributed Dimension. Again, we do this to anticipate
    # communications, which hopefully has a pay off in performance
    #
    # <Iteration x>                    <HaloSpot(u)>, in y
    #   <HaloSpot(u)>, in y    ---->   <Iteration x>
    #   <Iteration y>                    <Iteration y>
    mapper = {}
    for i, halo_spots in MapNodes(Iteration, HaloSpot).visit(iet).items():
        hoistable = [hs for hs in halo_spots if hs.hoistable]
        if not hoistable:
            continue
        elif len(hoistable) > 1:
            # We should never end up here, but for now we can't prove it formally
            perf_adv(
                "Found multiple hoistable HaloSpots, skipping optimization")
            continue
        hs = hoistable.pop()
        if hs in mapper:
            continue
        if i.dim.root in hs.dimensions:
            halo_scheme = hs.halo_scheme.drop(hs.hoistable)
            if halo_scheme.is_void:
                mapper[hs] = hs.body
            else:
                mapper[hs] = hs._rebuild(halo_scheme=halo_scheme)

            halo_scheme = hs.halo_scheme.project(hs.hoistable)
            mapper[i] = hs._rebuild(halo_scheme=halo_scheme, body=i._rebuild())
    iet = Transformer(mapper, nested=True).visit(iet)

    # Finally, we try to move HaloSpot-free Iteration nests within HaloSpot
    # subtrees, to overlap as much computation as possible. The HaloSpot-free
    # Iteration nests must be fully affine, otherwise we wouldn't be able to
    # honour the data dependences along the halo
    #
    # <HaloSpot(u,v)>            HaloSpot(u,v)
    #   <A>             ---->      <A>
    # <B>              affine?     <B>
    #
    # Here, <B> doesn't require any halo exchange, but it might still need the
    # output of <A>; thus, if we do computation/communication overlap over <A>
    # *and* want to embed <B> within the HaloSpot, then <B>'s iteration space
    # will have to be split as well. For this, <B> must be affine.
    mapper = {}
    for v in FindAdjacent((HaloSpot, Iteration)).visit(iet).values():
        for g in v:
            root = None
            for i in g:
                if i.is_HaloSpot:
                    root = i
                    mapper[root] = [root.body]
                elif root and all(j.is_Affine
                                  for j in FindNodes(Iteration).visit(i)):
                    mapper[root].append(i)
                    mapper[i] = None
                else:
                    root = None
    mapper = {
        k: k._rebuild(body=List(body=v)) if v else v
        for k, v in mapper.items()
    }
    iet = Transformer(mapper).visit(iet)

    return iet, {}
Exemplo n.º 4
0
def process(candidates, aliases, cluster, template):
    """
    Create Clusters from aliasing expressions.
    """
    clusters = []
    subs = {}
    for origin, alias in aliases.items():
        if all(i not in candidates for i in alias.aliased):
            continue

        # The write-to Intervals
        writeto = [
            Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0)))
            for i in cluster.ispace.intervals if not i.dim.is_Time
        ]
        writeto = IntervalGroup(writeto)

        # Optimization: no need to retain a SpaceDimension if it does not
        # induce a flow/anti dependence (below, `i.offsets` captures this, by
        # telling how much halo will be required to honour such dependences)
        dep_inducing = [i for i in writeto if any(i.offsets)]
        try:
            index = writeto.index(dep_inducing[0])
            writeto = IntervalGroup(writeto[index:])
        except IndexError:
            perf_adv("Could not optimize some of the detected redundancies")

        # Create a temporary to store `alias`
        dimensions = [d.root for d in writeto.dimensions]
        halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
        array = Array(name=template(),
                      dimensions=dimensions,
                      halo=halo,
                      dtype=cluster.dtype)

        # Build up the expression evaluating `alias`
        access = tuple(i.dim - i.lower for i in writeto)
        expression = Eq(array[access], origin.xreplace(subs))

        # Create the substitution rules so that we can use the newly created
        # temporary in place of the aliasing expressions
        for aliased, distance in alias.with_distance:
            assert all(i.dim in distance.labels for i in writeto)
            access = [i.dim - i.lower + distance[i.dim] for i in writeto]
            if aliased in candidates:
                # It would *not* be in `candidates` if part of a composite alias
                subs[candidates[aliased]] = array[access]
            subs[aliased] = array[access]

        # Construct the `alias` IterationSpace
        intervals, sub_iterators, directions = cluster.ispace.args
        ispace = IterationSpace(intervals.add(writeto), sub_iterators,
                                directions)

        # Optimize the `alias` IterationSpace: if possible, the innermost
        # IterationInterval is rounded up to a multiple of the vector length
        try:
            it = ispace.itintervals[-1]
            if ROUNDABLE in cluster.properties[it.dim]:
                from devito.parameters import configuration
                vl = configuration['platform'].simd_items_per_reg(
                    cluster.dtype)
                ispace = ispace.add(Interval(it.dim, 0, it.interval.size % vl))
        except (TypeError, KeyError):
            pass

        # Construct the `alias` DataSpace
        mapper = detect_accesses(expression)
        parts = {
            k: IntervalGroup(build_intervals(v)).add(ispace.intervals)
            for k, v in mapper.items() if k
        }
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Create a new Cluster for `alias`
        clusters.append(
            cluster.rebuild(exprs=[expression], ispace=ispace, dspace=dspace))

    return clusters, subs