def build_iterators(mapper): """ Given M as produced by :func:`detect_accesses`, return a mapper ``M' : D -> V``, where D is the set of Dimensions in M, and V is a set of DerivedDimensions. M'[d] provides the sub-iterators along the Dimension `d`. """ iterators = OrderedDict() for k, v in mapper.items(): for d, offs in v.items(): if d.is_Stepping: values = iterators.setdefault(d.parent, []) for i in sorted(offs): md = ModuloDimension(d, d.root + i, k._time_size, origin=d + i) if md not in values: values.append(md) elif d.is_Conditional: # There are no iterators associated to a ConditionalDimension continue else: iterators.setdefault(d, []) return {k: tuple(v) for k, v in iterators.items()}
def build_iterators(mapper): """ Given M as produced by :func:`detect_accesses`, return a mapper ``M' : D -> V``, where D is the set of Dimensions in M, and V is a set of DerivedDimensions. M'[d] provides the sub-iterators along the Dimension `d`. """ iterators = OrderedDict() for k, v in mapper.items(): for d, offs in v.items(): if d.is_Stepping: sub_iterators = iterators.setdefault(d.parent, set()) sub_iterators.update({ModuloDimension(d, i, k._time_size) for i in offs}) elif d.is_Conditional: # There are no iterators associated to a ConditionalDimension continue else: iterators.setdefault(d, set()) return {k: tuple(v) for k, v in iterators.items()}
def callback(self, clusters, prefix): if not prefix: return clusters d = prefix[-1].dim subiters = flatten( [c.ispace.sub_iterators.get(d, []) for c in clusters]) subiters = {i for i in subiters if i.is_Stepping} if not subiters: return clusters # Collect the index access functions along `d`, e.g., `t + 1` where `t` is # a SteppingDimension for `d = time` mapper = DefaultOrderedDict(lambda: DefaultOrderedDict(set)) for c in clusters: indexeds = [ a.indexed for a in c.scope.accesses if a.function.is_Tensor ] for i in indexeds: try: iaf = i.indices[d] except KeyError: continue # Sanity checks sis = iaf.free_symbols & subiters if len(sis) == 0: continue elif len(sis) == 1: si = sis.pop() else: raise InvalidOperator( "Cannot use multiple SteppingDimensions " "to index into a Function") size = i.function.shape_allocated[d] assert is_integer(size) mapper[size][si].add(iaf) # Construct the ModuloDimensions mds = [] for size, v in mapper.items(): for si, iafs in list(v.items()): # Offsets are sorted so that the semantic order (t0, t1, t2) follows # SymPy's index ordering (t, t-1, t+1) afer modulo replacement so # that associativity errors are consistent. This corresponds to # sorting offsets {-1, 0, 1} as {0, -1, 1} assigning -inf to 0 siafs = sorted(iafs, key=lambda i: -np.inf if i - si == 0 else (i - si)) for iaf in siafs: name = '%s%d' % (si.name, len(mds)) offset = uxreplace(iaf, {si: d.root}) mds.append( ModuloDimension(name, si, offset, size, origin=iaf)) # Replacement rule for ModuloDimensions def rule(size, e): try: return e.function.shape_allocated[d] == size except (AttributeError, KeyError): return False # Reconstruct the Clusters processed = [] for c in clusters: # Apply substitutions to expressions # Note: In an expression, there could be `u[t+1, ...]` and `v[t+1, # ...]`, where `u` and `v` are TimeFunction with circular time # buffers (save=None) *but* different modulo extent. The `t+1` # indices above are therefore conceptually different, so they will # be replaced with the proper ModuloDimension through two different # calls to `xreplace_indices` exprs = c.exprs groups = as_mapper(mds, lambda d: d.modulo) for size, v in groups.items(): mapper = {md.origin: md for md in v} func = partial(xreplace_indices, mapper=mapper, key=partial(rule, size)) exprs = [e.apply(func) for e in exprs] # Augment IterationSpace ispace = IterationSpace(c.ispace.intervals, { **c.ispace.sub_iterators, **{ d: tuple(mds) } }, c.ispace.directions) processed.append(c.rebuild(exprs=exprs, ispace=ispace)) return processed
def _optimize_schedule_rotations(schedule, sregistry): """ Transform the schedule such that the tensor temporaries "rotate" along the outermost Dimension. This trades a parallel Dimension for a smaller working set size. """ # The rotations Dimension is the outermost ridx = 0 rmapper = defaultdict(list) processed = [] for k, group in groupby(schedule, key=lambda i: i.writeto): g = list(group) candidate = k[ridx] d = candidate.dim try: ds = schedule.dmapper[d] except KeyError: # Can't do anything if `d` isn't an IncrDimension over a block processed.extend(g) continue n = candidate.min_size assert n > 0 iis = candidate.lower iib = candidate.upper ii = ModuloDimension('%sii' % d, ds, iis, incr=iib) cd = CustomDimension(name='%s%s' % (d, d), symbolic_min=ii, symbolic_max=iib, symbolic_size=n) dsi = ModuloDimension('%si' % ds, cd, cd + ds - iis, n) mapper = OrderedDict() for i in g: # Update `indicess` to use `xs0`, `xs1`, ... mds = [] for indices in i.indicess: v = indices[ridx] try: md = mapper[v] except KeyError: name = sregistry.make_name(prefix='%sr' % d.name) md = mapper.setdefault(v, ModuloDimension(name, ds, v, n)) mds.append(md) indicess = [ indices[:ridx] + [md] + indices[ridx + 1:] for md, indices in zip(mds, i.indicess) ] # Update `writeto` by switching `d` to `dsi` intervals = k.intervals.switch(d, dsi).zero(dsi) sub_iterators = dict(k.sub_iterators) sub_iterators[d] = dsi writeto = IterationSpace(intervals, sub_iterators) # Transform `alias` by adding `i` alias = i.alias.xreplace({d: d + cd}) # Extend `ispace` to iterate over rotations d1 = writeto[ridx + 1].dim # Note: we're by construction in-bounds here intervals = IntervalGroup(Interval(cd, 0, 0), relations={(d, cd, d1)}) rispace = IterationSpace(intervals, {cd: dsi}, {cd: Forward}) aispace = i.ispace.zero(d) aispace = aispace.augment({d: mds + [ii]}) ispace = IterationSpace.union(rispace, aispace) processed.append( ScheduledAlias(alias, writeto, ispace, i.aliaseds, indicess)) # Update the rotations mapper rmapper[d].extend(list(mapper.values())) return Schedule(*processed, dmapper=schedule.dmapper, rmapper=rmapper)
def __init__(self, function, contracted_dims, accessv, options, sregistry, bds=None, mds=None): # Parse compilation options async_degree = options['buf-async-degree'] space = options['buf-mem-space'] dtype = options['buf-dtype'](function) self.function = function self.accessv = accessv self.contraction_mapper = {} self.index_mapper = defaultdict(dict) self.sub_iterators = defaultdict(list) self.subdims_mapper = DefaultOrderedDict(set) # Create the necessary ModuloDimensions for indexing into the buffer # E.g., `u[time,x] + u[time+1,x] -> `ub[sb0,x] + ub[sb1,x]`, where `sb0` # and `sb1` are ModuloDimensions starting at `time` and `time+1` respectively dims = list(function.dimensions) for d in contracted_dims: assert d in function.dimensions # Determine the buffer size, and therefore the span of the ModuloDimension, # along the contracting Dimension `d` indices = filter_ordered(i.indices[d] for i in accessv.accesses) slots = [i.subs({d: 0, d.spacing: 1}) for i in indices] try: size = max(slots) - min(slots) + 1 except TypeError: # E.g., special case `slots=[-1 + time/factor, 2 + time/factor]` # Resort to the fast vector-based comparison machinery (rather than # the slower sympy.simplify) slots = [Vector(i) for i in slots] size = int((vmax(*slots) - vmin(*slots) + 1)[0]) if async_degree is not None: if async_degree < size: warning("Ignoring provided asynchronous degree as it'd be " "too small for the required buffer (provided %d, " "but need at least %d for `%s`)" % (async_degree, size, function.name)) else: size = async_degree # Replace `d` with a suitable CustomDimension `bd` name = sregistry.make_name(prefix='db') bd = bds.setdefault((d, size), CustomDimension(name, 0, size-1, size, d)) self.contraction_mapper[d] = dims[dims.index(d)] = bd # Finally create the ModuloDimensions as children of `bd` if size > 1: # Note: indices are sorted so that the semantic order (sb0, sb1, sb2) # follows SymPy's index ordering (time, time-1, time+1) after modulo # replacement, so that associativity errors are consistent. This very # same strategy is also applied in clusters/algorithms/Stepper p, _ = offset_from_centre(d, indices) indices = sorted(indices, key=lambda i: -np.inf if i - p == 0 else (i - p)) for i in indices: name = sregistry.make_name(prefix='sb') md = mds.setdefault((bd, i), ModuloDimension(name, bd, i, size)) self.index_mapper[d][i] = md self.sub_iterators[d.root].append(md) else: assert len(indices) == 1 self.index_mapper[d][indices[0]] = 0 # Track the SubDimensions used to index into `function` for e in accessv.mapper: m = {i.root: i for i in e.free_symbols if isinstance(i, Dimension) and (i.is_Sub or not i.is_Derived)} for d, v in m.items(): self.subdims_mapper[d].add(v) if any(len(v) > 1 for v in self.subdims_mapper.values()): # Non-uniform SubDimensions. At this point we're going to raise # an exception. It's either illegal or still unsupported for v in self.subdims_mapper.values(): for d0, d1 in combinations(v, 2): if d0.overlap(d1): raise InvalidOperator("Cannot apply `buffering` to `%s` as it " "is accessed over the overlapping " " SubDimensions `<%s, %s>`" % (function, d0, d1)) raise NotImplementedError("`buffering` does not support multiple " "non-overlapping SubDimensions yet.") else: self.subdims_mapper = {d: v.pop() for d, v in self.subdims_mapper.items()} # Build and sanity-check the buffer IterationIntervals self.itintervals_mapper = {} for e in accessv.mapper: for i in e.ispace.itintervals: v = self.itintervals_mapper.setdefault(i.dim, i.args) if v != self.itintervals_mapper[i.dim]: raise NotImplementedError("Cannot apply `buffering` as the buffered " "function `%s` is accessed over multiple, " "non-compatible iteration spaces along the " "Dimension `%s`" % (function.name, i.dim)) # Also add IterationIntervals for initialization along `x`, should `xi` be # the only written Dimension in the `x` hierarchy for d, (interval, _, _) in list(self.itintervals_mapper.items()): for i in d._defines: self.itintervals_mapper.setdefault(i, (interval.relaxed, (), Forward)) # Finally create the actual buffer self.buffer = Array(name=sregistry.make_name(prefix='%sb' % function.name), dimensions=dims, dtype=dtype, halo=function.halo, space=space)