示例#1
0
    def __init__(self, expressions):
        mapper = DefaultOrderedDict(lambda: DefaultOrderedDict(AccessTuple))
        for e in expressions:
            for i in retrieve_function_carriers(e.rhs):
                mapper[i.function][e].reads.append(i)
            mapper[e.lhs.function][e].write = e.lhs

        super().__init__([(f, AccessValue(f, mapper[f])) for f in mapper])
示例#2
0
文件: graph.py 项目: nw0/devito
    def __init__(self, exprs, **kwargs):
        # Check input legality
        mapper = OrderedDict([(i.lhs, i) for i in exprs])
        if len(set(mapper)) != len(mapper):
            raise DSEException(
                "Found redundant node, cannot build TemporariesGraph.")

        # Construct Temporaries, tracking reads and readby
        tensor_map = DefaultOrderedDict(list)
        for i in mapper:
            tensor_map[as_symbol(i)].append(i)
        reads = DefaultOrderedDict(set)
        readby = DefaultOrderedDict(set)
        for k, v in mapper.items():
            handle = retrieve_terminals(v.rhs)
            for i in list(handle):
                if i.is_Indexed:
                    for idx in i.indices:
                        handle |= retrieve_terminals(idx)
            reads[k].update(
                set(flatten([tensor_map.get(as_symbol(i), [])
                             for i in handle])))
            for i in reads[k]:
                readby[i].add(k)

        # Make sure read-after-writes are honored for scalar temporaries
        processed = [i for i in mapper if i.is_Indexed]
        queue = [i for i in mapper if i not in processed]
        while queue:
            k = queue.pop(0)
            if not readby[k]:
                processed.insert(0, k)
            elif all(i in processed for i in readby[k]):
                index = min(processed.index(i) for i in readby[k])
                processed.insert(index, k)
            else:
                queue.append(k)

        # Build up the TemporariesGraph
        temporaries = [(i,
                        Temporary(*mapper[i].args,
                                  inc=q_inc(mapper[i]),
                                  reads=reads[i],
                                  readby=readby[i])) for i in processed]
        super(TemporariesGraph, self).__init__(temporaries, **kwargs)

        # Determine indices along the space and time dimensions
        terms = [
            v for k, v in self.items() if v.is_tensor and not q_indirect(k)
        ]
        indices = filter_ordered(flatten([i.function.indices for i in terms]))
        self.space_indices = tuple(i for i in indices if i.is_Space)
        self.time_indices = tuple(i for i in indices if i.is_Time)
示例#3
0
文件: graph.py 项目: woxin5295/devito
    def __init__(self, exprs, **kwargs):
        # Always convert to SSA
        exprs = convert_to_SSA(exprs)
        mapper = OrderedDict([(i.lhs, i) for i in exprs])
        assert len(set(mapper)) == len(exprs), "not SSA Cluster?"

        # Construct the Nodes, tracking reads and readby
        tensor_map = DefaultOrderedDict(list)
        for i in mapper:
            tensor_map[as_symbol(i)].append(i)
        reads = DefaultOrderedDict(set)
        readby = DefaultOrderedDict(set)
        for k, v in mapper.items():
            handle = retrieve_terminals(v.rhs)
            for i in list(handle):
                if i.is_Indexed:
                    for idx in i.indices:
                        handle |= retrieve_terminals(idx)
            reads[k].update(
                set(flatten([tensor_map.get(as_symbol(i), [])
                             for i in handle])))
            for i in reads[k]:
                readby[i].add(k)

        # Make sure read-after-writes are honored for scalar temporaries
        processed = [i for i in mapper if i.is_Indexed]
        queue = [i for i in mapper if i not in processed]
        while queue:
            k = queue.pop(0)
            if not readby[k]:
                processed.insert(0, k)
            elif all(i in processed for i in readby[k]):
                index = min(processed.index(i) for i in readby[k])
                processed.insert(index, k)
            else:
                queue.append(k)

        # Build up the FlowGraph
        temporaries = [(i,
                        Node(*mapper[i].args,
                             inc=q_inc(mapper[i]),
                             reads=reads[i],
                             readby=readby[i])) for i in processed]
        super(FlowGraph, self).__init__(temporaries, **kwargs)

        # Determine indices along the space and time dimensions
        terms = [
            v for k, v in self.items() if v.is_tensor and not q_indirect(k)
        ]
        indices = filter_ordered(flatten([i.function.indices for i in terms]))
        self.space_indices = tuple(i for i in indices if i.is_Space)
        self.time_indices = tuple(i for i in indices if i.is_Time)
示例#4
0
文件: graph.py 项目: opesci/devito
    def __init__(self, exprs, **kwargs):
        # Always convert to SSA
        exprs = makeit_ssa(exprs)
        mapper = OrderedDict([(i.lhs, i) for i in exprs])
        assert len(set(mapper)) == len(exprs), "not SSA Cluster?"

        # Construct the Nodes, tracking reads and readby
        tensor_map = DefaultOrderedDict(list)
        for i in mapper:
            tensor_map[as_symbol(i)].append(i)
        reads = DefaultOrderedDict(set)
        readby = DefaultOrderedDict(set)
        for k, v in mapper.items():
            handle = retrieve_terminals(v.rhs)
            for i in list(handle):
                if i.is_Indexed:
                    for idx in i.indices:
                        handle |= retrieve_terminals(idx)
            reads[k].update(set(flatten([tensor_map.get(as_symbol(i), [])
                                         for i in handle])))
            for i in reads[k]:
                readby[i].add(k)

        # Make sure read-after-writes are honored for scalar nodes
        processed = [i for i in mapper if i.is_Indexed]
        queue = [i for i in mapper if i not in processed]
        while queue:
            k = queue.pop(0)
            if not readby[k] or k in readby[k]:
                processed.insert(0, k)
            elif all(i in processed for i in readby[k]):
                index = min(processed.index(i) for i in readby[k])
                processed.insert(index, k)
            else:
                queue.append(k)

        # Build up the FlowGraph
        nodes = [(i, Node(mapper[i], reads=reads[i], readby=readby[i]))
                 for i in processed]
        super(FlowGraph, self).__init__(nodes, **kwargs)

        # Determine indices along the space and time dimensions
        terms = [v for k, v in self.items() if v.is_Tensor and not q_indirect(k)]
        indices = filter_ordered(flatten([i.function.indices for i in terms]))
        self.space_indices = tuple(i for i in indices if i.is_Space)
        self.time_indices = tuple(i for i in indices if i.is_Time)
示例#5
0
文件: graph.py 项目: jrt54/devito
    def __init__(self, exprs, **kwargs):
        # Always convert to SSA
        exprs = makeit_ssa(exprs)
        mapper = OrderedDict([(i.lhs, i) for i in exprs])
        assert len(set(mapper)) == len(exprs), "not SSA Cluster?"

        # Construct the Nodes, tracking reads and readby
        tensor_map = DefaultOrderedDict(list)
        for i in mapper:
            tensor_map[as_symbol(i)].append(i)
        reads = DefaultOrderedDict(set)
        readby = DefaultOrderedDict(set)
        for k, v in mapper.items():
            handle = retrieve_terminals(v.rhs, deep=True)
            reads[k].update(
                set(flatten([tensor_map.get(as_symbol(i), [])
                             for i in handle])))
            for i in reads[k]:
                readby[i].add(k)

        # Make sure read-after-writes are honored for scalar nodes
        processed = [i for i in mapper if i.is_Indexed]
        queue = [i for i in mapper if i not in processed]
        while queue:
            k = queue.pop(0)
            if not readby[k] or k in readby[k]:
                processed.insert(0, k)
            elif all(i in processed for i in readby[k]):
                index = min(processed.index(i) for i in readby[k])
                processed.insert(index, k)
            else:
                queue.append(k)

        # Build up the FlowGraph
        nodes = [(i, Node(mapper[i], reads=reads[i], readby=readby[i]))
                 for i in processed]
        super(FlowGraph, self).__init__(nodes, **kwargs)
示例#6
0
def actions_from_unstructured(clusters, key, prefix, actions):
    it = prefix[-1]
    d = it.dim
    direction = it.direction

    # Locate the streamable Functions
    first_seen = {}
    last_seen = {}
    for c in clusters:
        candidates = key(c)
        if not candidates:
            continue
        for i in c.scope.accesses:
            f = i.function
            if f in candidates:
                k = (f, i[d])
                first_seen.setdefault(k, c)
                last_seen[k] = c
    if not first_seen:
        return clusters

    callbacks = [(frozendict(first_seen), FetchPrefetch),
                 (frozendict(last_seen), Delete)]

    # Create and map SyncOps to Clusters
    for seen, callback in callbacks:
        mapper = defaultdict(lambda: DefaultOrderedDict(list))
        for (f, v), c in seen.items():
            mapper[c][f].append(v)

        for c, m in mapper.items():
            for f, v in m.items():
                for fetch, s in indices_to_sections(v):
                    if direction is Forward:
                        ifetch = fetch.subs(d, d.symbolic_min)
                        fcond = make_cond(c.guards.get(d), d, direction, d.symbolic_min)
                        pfetch = fetch + 1
                        pcond = make_cond(c.guards.get(d), d, direction, d + 1)
                    else:
                        ifetch = fetch.subs(d, d.symbolic_max)
                        fcond = make_cond(c.guards.get(d), d, direction, d.symbolic_max)
                        pfetch = fetch - 1
                        pcond = make_cond(c.guards.get(d), d, direction, d - 1)

                    syncop = callback(d, s, f, fetch, ifetch, fcond, pfetch, pcond)
                    actions[c].syncs[d].append(syncop)
示例#7
0
def stree_schedule(clusters):
    """
    Arrange an iterable of Clusters into a ScheduleTree.
    """
    stree = ScheduleTree()

    prev = None
    mapper = DefaultOrderedDict(lambda: Bunch(top=None, bottom=None))

    def attach_metadata(cluster, d, tip):
        if d in cluster.guards:
            tip = NodeConditional(cluster.guards[d], tip)
        if d in cluster.syncs:
            tip = NodeSync(cluster.syncs[d], tip)
        return tip

    for c in clusters:
        # Add in any Conditionals and Syncs outside of the outermost Iteration
        tip = attach_metadata(c, None, stree)

        if tip is stree:
            pointers = list(mapper)
        else:
            pointers = []

        index = 0
        for it0, it1 in zip(c.itintervals, pointers):
            if it0 != it1:
                break
            index += 1

            d = it0.dim

            # The reused sub-trees might acquire new sub-iterators as well as
            # new properties
            mapper[it0].top.ispace = IterationSpace.union(
                mapper[it0].top.ispace, c.ispace.project([d]))
            mapper[it0].top.properties = normalize_properties(
                mapper[it0].top.properties, c.properties[it0.dim])

            # Different guards or syncops cannot be further nested
            if c.guards.get(d) != prev.guards.get(d) or \
               c.syncs.get(d) != prev.syncs.get(d):
                tip = mapper[it0].top
                tip = attach_metadata(c, d, tip)
                mapper[it0].bottom = tip
                break
            else:
                tip = mapper[it0].bottom

        # Nested sub-trees, instead, will not be used anymore
        for it in pointers[index:]:
            mapper.pop(it)

        # Add in Iterations, Conditionals, and Syncs
        for it in c.itintervals[index:]:
            d = it.dim
            tip = NodeIteration(c.ispace.project([d]), tip,
                                c.properties.get(d))
            mapper[it].top = tip
            tip = attach_metadata(c, d, tip)
            mapper[it].bottom = tip

        # Add in Expressions
        NodeExprs(c.exprs, c.ispace, c.dspace, c.ops, c.traffic, tip)

        # Prepare for next iteration
        prev = c

    return stree
示例#8
0
    def callback(self, clusters, prefix):
        if not prefix:
            return clusters

        it = prefix[-1]
        d = it.dim
        direction = it.direction

        try:
            pd = prefix[-2].dim
        except IndexError:
            pd = None

        # What are the stream-able Dimensions?
        # 0) all sequential Dimensions
        # 1) all CustomDimensions of fixed (i.e. integer) size, which
        #    implies a bound on the amount of streamed data
        if all(SEQUENTIAL in c.properties[d] for c in clusters):
            make_fetch = lambda f, i, s, cb: FetchWaitPrefetch(
                f, d, direction, i, s, cb)
            make_delete = lambda f, i, s, cb: Delete(f, d, direction, i, s, cb)
            syncd = d
        elif d.is_Custom and is_integer(it.size):
            make_fetch = lambda f, i, s, cb: FetchWait(f, d, direction, i, it.
                                                       size, cb)
            make_delete = lambda f, i, s, cb: Delete(f, d, direction, i, it.
                                                     size, cb)
            syncd = pd
        else:
            return clusters

        first_seen = {}
        last_seen = {}
        for c in clusters:
            candidates = self.key(c)
            if not candidates:
                continue
            for i in c.scope.accesses:
                f = i.function
                if f in candidates:
                    k = (f, i[d])
                    first_seen.setdefault(k, c)
                    last_seen[k] = c

        if not first_seen:
            return clusters

        # Bind fetches and deletes to Clusters
        sync_ops = defaultdict(list)
        callbacks = [(frozendict(first_seen), make_fetch),
                     (frozendict(last_seen), make_delete)]
        for seen, callback in callbacks:
            mapper = defaultdict(lambda: DefaultOrderedDict(list))
            for (f, v), c in seen.items():
                mapper[c][f].append(v)
            for c, m in mapper.items():
                for f, v in m.items():
                    for i, s in indices_to_sections(v):
                        next_cbk = make_next_cbk(c.guards.get(d), d, direction)
                        sync_ops[c].append(callback(f, i, s, next_cbk))

        # Attach SyncOps to Clusters
        processed = []
        for c in clusters:
            v = sync_ops.get(c)
            if v is not None:
                processed.append(
                    c.rebuild(syncs=normalize_syncs(c.syncs, {syncd: v})))
            else:
                processed.append(c)

        return processed
示例#9
0
    def callback(self, clusters, prefix):
        if not prefix:
            return clusters

        d = prefix[-1].dim

        subiters = flatten(
            [c.ispace.sub_iterators.get(d, []) for c in clusters])
        subiters = {i for i in subiters if i.is_Stepping}
        if not subiters:
            return clusters

        # Collect the index access functions along `d`, e.g., `t + 1` where `t` is
        # a SteppingDimension for `d = time`
        mapper = DefaultOrderedDict(lambda: DefaultOrderedDict(set))
        for c in clusters:
            indexeds = [
                a.indexed for a in c.scope.accesses if a.function.is_Tensor
            ]

            for i in indexeds:
                try:
                    iaf = i.indices[d]
                except KeyError:
                    continue

                # Sanity checks
                sis = iaf.free_symbols & subiters
                if len(sis) == 0:
                    continue
                elif len(sis) == 1:
                    si = sis.pop()
                else:
                    raise InvalidOperator(
                        "Cannot use multiple SteppingDimensions "
                        "to index into a Function")
                size = i.function.shape_allocated[d]
                assert is_integer(size)

                mapper[size][si].add(iaf)

        # Construct the ModuloDimensions
        mds = []
        for size, v in mapper.items():
            for si, iafs in list(v.items()):
                # Offsets are sorted so that the semantic order (t0, t1, t2) follows
                # SymPy's index ordering (t, t-1, t+1) afer modulo replacement so
                # that associativity errors are consistent. This corresponds to
                # sorting offsets {-1, 0, 1} as {0, -1, 1} assigning -inf to 0
                siafs = sorted(iafs,
                               key=lambda i: -np.inf
                               if i - si == 0 else (i - si))

                for iaf in siafs:
                    name = '%s%d' % (si.name, len(mds))
                    offset = uxreplace(iaf, {si: d.root})
                    mds.append(
                        ModuloDimension(name, si, offset, size, origin=iaf))

        # Replacement rule for ModuloDimensions
        def rule(size, e):
            try:
                return e.function.shape_allocated[d] == size
            except (AttributeError, KeyError):
                return False

        # Reconstruct the Clusters
        processed = []
        for c in clusters:
            # Apply substitutions to expressions
            # Note: In an expression, there could be `u[t+1, ...]` and `v[t+1,
            # ...]`, where `u` and `v` are TimeFunction with circular time
            # buffers (save=None) *but* different modulo extent. The `t+1`
            # indices above are therefore conceptually different, so they will
            # be replaced with the proper ModuloDimension through two different
            # calls to `xreplace_indices`
            exprs = c.exprs
            groups = as_mapper(mds, lambda d: d.modulo)
            for size, v in groups.items():
                mapper = {md.origin: md for md in v}

                func = partial(xreplace_indices,
                               mapper=mapper,
                               key=partial(rule, size))
                exprs = [e.apply(func) for e in exprs]

            # Augment IterationSpace
            ispace = IterationSpace(c.ispace.intervals, {
                **c.ispace.sub_iterators,
                **{
                    d: tuple(mds)
                }
            }, c.ispace.directions)

            processed.append(c.rebuild(exprs=exprs, ispace=ispace))

        return processed
示例#10
0
 def __init__(self, *args, **kwargs):
     super(PerformanceSummary, self).__init__(*args, **kwargs)
     self.subsections = DefaultOrderedDict(lambda: OrderedDict())
     self.input = OrderedDict()
     self.globals = {}
示例#11
0
def linearize_accesses(iet, key, cache, sregistry):
    """
    Turn Indexeds into FIndexeds and create the necessary access Macros.
    """
    # `functions` are all Functions that `iet` may need to linearize
    functions = [f for f in FindSymbols().visit(iet) if key(f) and f.ndim > 1]
    functions = sorted(functions, key=lambda f: len(f.dimensions), reverse=True)

    # `functions_unseen` are all Functions that `iet` may need to linearize
    # and have not been seen while processing other IETs
    functions_unseen = [f for f in functions if f not in cache]

    # Find unique sizes (unique -> minimize necessary registers)
    mapper = DefaultOrderedDict(list)
    for f in functions:
        # NOTE: the outermost dimension is unnecessary
        for d in f.dimensions[1:]:
            # TODO: same grid + same halo => same padding, however this is
            # never asserted throughout the compiler yet... maybe should do
            # it when in debug mode at `prepare_arguments` time, ie right
            # before jumping to C?
            mapper[(d, f._size_halo[d], getattr(f, 'grid', None))].append(f)

    # For all unseen Functions, build the size exprs. For example:
    # `x_fsz0 = u_vec->size[1]`
    imapper = DefaultOrderedDict(dict)
    for (d, halo, _), v in mapper.items():
        v_unseen = [f for f in v if f in functions_unseen]
        if not v_unseen:
            continue
        expr = _generate_fsz(v_unseen[0], d, sregistry)
        if expr:
            for f in v_unseen:
                imapper[f][d] = expr.write
                cache[f].stmts0.append(expr)

    # For all unseen Functions, build the stride exprs. For example:
    # `y_stride0 = y_fsz0*z_fsz0`
    built = {}
    mapper = DefaultOrderedDict(dict)
    for f, v in imapper.items():
        for d in v:
            n = f.dimensions.index(d)
            expr = prod(v[i] for i in f.dimensions[n:])
            try:
                stmt = built[expr]
            except KeyError:
                name = sregistry.make_name(prefix='%s_stride' % d.name)
                s = Symbol(name=name, dtype=np.int64, is_const=True)
                stmt = built[expr] = DummyExpr(s, expr, init=True)
            mapper[f][d] = stmt.write
            cache[f].stmts1.append(stmt)
    mapper.update([(f, {}) for f in functions_unseen if f not in mapper])

    # For all unseen Functions, build defines. For example:
    # `#define uL(t, x, y, z) u[(t)*t_stride0 + (x)*x_stride0 + (y)*y_stride0 + (z)]`
    headers = []
    findexeds = {}
    for f in functions:
        if cache[f].cbk is None:
            header, cbk = _generate_macro(f, mapper[f], sregistry)
            headers.append(header)
            cache[f].cbk = findexeds[f] = cbk
        else:
            findexeds[f] = cache[f].cbk

    # Build "functional" Indexeds. For example:
    # `u[t2, x+8, y+9, z+7] => uL(t2, x+8, y+9, z+7)`
    mapper = {}
    indexeds = FindSymbols('indexeds').visit(iet)
    for i in indexeds:
        try:
            mapper[i] = findexeds[i.function](i)
        except KeyError:
            pass

    # Introduce the linearized expressions
    iet = Uxreplace(mapper).visit(iet)

    # All Functions that actually require linearization in `iet`
    candidates = []

    candidates.extend(filter_ordered(i.function for i in indexeds))

    calls = FindNodes(Call).visit(iet)
    cfuncs = filter_ordered(flatten(i.functions for i in calls))
    candidates.extend(i for i in cfuncs if i.function.is_DiscreteFunction)

    # All Functions that can be linearized in `iet`
    defines = FindSymbols('defines-aliases').visit(iet)

    # Place the linearization expressions or delegate to ancestor efunc
    stmts0 = []
    stmts1 = []
    args = []
    for f in candidates:
        if f in defines:
            stmts0.extend(cache[f].stmts0)
            stmts1.extend(cache[f].stmts1)
        else:
            args.extend([e.write for e in cache[f].stmts1])
    if stmts0:
        assert len(stmts1) > 0
        stmts0 = filter_ordered(stmts0) + [BlankLine]
        stmts1 = filter_ordered(stmts1) + [BlankLine]
        body = iet.body._rebuild(body=tuple(stmts0) + tuple(stmts1) + iet.body.body)
        iet = iet._rebuild(body=body)
    else:
        assert len(stmts0) == 0

    return iet, headers, args
示例#12
0
def linearize_accesses(iet, cache, sregistry):
    """
    Turn Indexeds into FIndexeds and create the necessary access Macros.
    """
    # Find all objects amenable to linearization
    symbol_names = {i.name for i in FindSymbols('indexeds').visit(iet)}
    functions = [f for f in FindSymbols().visit(iet)
                 if ((f.is_DiscreteFunction or f.is_Array) and
                     f.ndim > 1 and
                     f.name in symbol_names)]
    functions = sorted(functions, key=lambda f: len(f.dimensions), reverse=True)

    # Find unique sizes (unique -> minimize necessary registers)
    mapper = DefaultOrderedDict(list)
    for f in functions:
        if f not in cache:
            # NOTE: the outermost dimension is unnecessary
            for d in f.dimensions[1:]:
                # TODO: same grid + same halo => same padding, however this is
                # never asserted throughout the compiler yet... maybe should do
                # it when in debug mode at `prepare_arguments` time, ie right
                # before jumping to C?
                mapper[(d, f._size_halo[d], getattr(f, 'grid', None))].append(f)

    # Build all exprs such as `x_fsz0 = u_vec->size[1]`
    imapper = DefaultOrderedDict(list)
    for (d, halo, _), v in mapper.items():
        name = sregistry.make_name(prefix='%s_fsz' % d.name)
        s = Symbol(name=name, dtype=np.int32, is_const=True)
        try:
            expr = DummyExpr(s, v[0]._C_get_field(FULL, d).size, init=True)
        except AttributeError:
            assert v[0].is_Array
            expr = DummyExpr(s, v[0].symbolic_shape[d], init=True)
        for f in v:
            imapper[f].append((d, s))
            cache[f].stmts0.append(expr)

    # Build all exprs such as `y_slc0 = y_fsz0*z_fsz0`
    built = {}
    mapper = DefaultOrderedDict(list)
    for f, v in imapper.items():
        for n, (d, _) in enumerate(v):
            expr = prod(list(zip(*v[n:]))[1])
            try:
                stmt = built[expr]
            except KeyError:
                name = sregistry.make_name(prefix='%s_slc' % d.name)
                s = Symbol(name=name, dtype=np.int32, is_const=True)
                stmt = built[expr] = DummyExpr(s, expr, init=True)
            mapper[f].append(stmt.write)
            cache[f].stmts1.append(stmt)
    mapper.update([(f, []) for f in functions if f not in mapper])

    # Build defines. For example:
    # `define uL(t, x, y, z) u[(t)*t_slice_sz + (x)*x_slice_sz + (y)*y_slice_sz + (z)]`
    headers = []
    findexeds = {}
    for f, szs in mapper.items():
        if cache[f].cbk is not None:
            # Perhaps we've already built an access macro for `f` through another efunc
            findexeds[f] = cache[f].cbk
        else:
            assert len(szs) == len(f.dimensions) - 1
            pname = sregistry.make_name(prefix='%sL' % f.name)

            expr = sum([MacroArgument(d.name)*s for d, s in zip(f.dimensions, szs)])
            expr += MacroArgument(f.dimensions[-1].name)
            expr = Indexed(IndexedData(f.name, None, f), expr)
            define = DefFunction(pname, f.dimensions)
            headers.append((ccode(define), ccode(expr)))

            cache[f].cbk = findexeds[f] = lambda i, pname=pname: FIndexed(i, pname)

    # Build "functional" Indexeds. For example:
    # `u[t2, x+8, y+9, z+7] => uL(t2, x+8, y+9, z+7)`
    mapper = {}
    for n in FindNodes(Expression).visit(iet):
        subs = {}
        for i in retrieve_indexed(n.expr):
            try:
                subs[i] = findexeds[i.function](i)
            except KeyError:
                pass
        mapper[n] = n._rebuild(expr=uxreplace(n.expr, subs))

    # Put together all of the necessary exprs for `y_fsz0`, ..., `y_slc0`, ...
    stmts0 = filter_ordered(flatten(cache[f].stmts0 for f in functions))
    if stmts0:
        stmts0.append(BlankLine)
    stmts1 = filter_ordered(flatten(cache[f].stmts1 for f in functions))
    if stmts1:
        stmts1.append(BlankLine)

    iet = Transformer(mapper).visit(iet)
    body = iet.body._rebuild(body=tuple(stmts0) + tuple(stmts1) + iet.body.body)
    iet = iet._rebuild(body=body)

    return iet, headers
示例#13
0
    def __init__(self, function, contracted_dims, accessv, n, async_degree):
        self.function = function
        self.accessv = accessv

        contraction_mapper = {}
        index_mapper = {}
        dims = list(function.dimensions)
        for d in contracted_dims:
            assert d in function.dimensions

            # Determine the buffer size along `d`
            indices = filter_ordered(i.indices[d] for i in accessv.accesses)
            slots = [i.xreplace({d: 0, d.spacing: 1}) for i in indices]
            size = max(slots) - min(slots) + 1

            if async_degree is not None:
                if async_degree < size:
                    warning("Ignoring provided asynchronous degree as it'd be "
                            "too small for the required buffer (provided %d, "
                            "but need at least %d for `%s`)"
                            % (async_degree, size, function.name))
                else:
                    size = async_degree

            # Replace `d` with a suitable CustomDimension
            bd = CustomDimension('db%d' % n, 0, size-1, size, d)
            contraction_mapper[d] = dims[dims.index(d)] = bd

            if size > 1:
                # Create the necessary SteppingDimensions for indexing
                sd = SteppingDimension(name='sb%d' % n, parent=bd)
                index_mapper.update({i: i.xreplace({d: sd}) for i in indices})
            else:
                # Special case, no need to keep a SteppingDimension around
                index_mapper.update({i: 0 for i in indices})

        self.contraction_mapper = contraction_mapper
        self.index_mapper = index_mapper

        # Track the SubDimensions used to index into `function`
        subdims_mapper = DefaultOrderedDict(set)
        for e in accessv.mapper:
            try:
                # Case 1: implicitly via SubDomains
                m = {d.root: v for d, v in e.subdomain.dimension_map.items()}
            except AttributeError:
                # Case 2: explicitly via the lower-level SubDimension API
                m = {i.root: i for i in e.free_symbols
                     if isinstance(i, Dimension) and (i.is_Sub or not i.is_Derived)}
            for d, v in m.items():
                subdims_mapper[d].add(v)
        if any(len(v) > 1 for v in subdims_mapper.values()):
            # Non-uniform SubDimensions. At this point we're going to raise
            # an exception. It's either illegal or still unsupported
            for v in subdims_mapper.values():
                for d0, d1 in combinations(v, 2):
                    if d0.overlap(d1):
                        raise InvalidOperator("Cannot apply `buffering` to `%s` as it "
                                              "is accessed over the overlapping "
                                              " SubDimensions `<%s, %s>`" %
                                              (function, d0, d1))
            self.subdims_mapper = None
            raise NotImplementedError("`buffering` does not support multiple "
                                      "non-overlapping SubDimensions yet.")
        else:
            self.subdims_mapper = {d: v.pop() for d, v in subdims_mapper.items()}

        self.buffer = Array(name='%sb' % function.name,
                            dimensions=dims,
                            dtype=function.dtype,
                            halo=function.halo,
                            space='mapped')
示例#14
0
    def __init__(self, function, contracted_dims, accessv, options, sregistry,
                 bds=None, mds=None):
        # Parse compilation options
        async_degree = options['buf-async-degree']
        space = options['buf-mem-space']
        dtype = options['buf-dtype'](function)

        self.function = function
        self.accessv = accessv

        self.contraction_mapper = {}
        self.index_mapper = defaultdict(dict)
        self.sub_iterators = defaultdict(list)
        self.subdims_mapper = DefaultOrderedDict(set)

        # Create the necessary ModuloDimensions for indexing into the buffer
        # E.g., `u[time,x] + u[time+1,x] -> `ub[sb0,x] + ub[sb1,x]`, where `sb0`
        # and `sb1` are ModuloDimensions starting at `time` and `time+1` respectively
        dims = list(function.dimensions)
        for d in contracted_dims:
            assert d in function.dimensions

            # Determine the buffer size, and therefore the span of the ModuloDimension,
            # along the contracting Dimension `d`
            indices = filter_ordered(i.indices[d] for i in accessv.accesses)
            slots = [i.subs({d: 0, d.spacing: 1}) for i in indices]
            try:
                size = max(slots) - min(slots) + 1
            except TypeError:
                # E.g., special case `slots=[-1 + time/factor, 2 + time/factor]`
                # Resort to the fast vector-based comparison machinery (rather than
                # the slower sympy.simplify)
                slots = [Vector(i) for i in slots]
                size = int((vmax(*slots) - vmin(*slots) + 1)[0])

            if async_degree is not None:
                if async_degree < size:
                    warning("Ignoring provided asynchronous degree as it'd be "
                            "too small for the required buffer (provided %d, "
                            "but need at least %d for `%s`)"
                            % (async_degree, size, function.name))
                else:
                    size = async_degree

            # Replace `d` with a suitable CustomDimension `bd`
            name = sregistry.make_name(prefix='db')
            bd = bds.setdefault((d, size), CustomDimension(name, 0, size-1, size, d))
            self.contraction_mapper[d] = dims[dims.index(d)] = bd

            # Finally create the ModuloDimensions as children of `bd`
            if size > 1:
                # Note: indices are sorted so that the semantic order (sb0, sb1, sb2)
                # follows SymPy's index ordering (time, time-1, time+1) after modulo
                # replacement, so that associativity errors are consistent. This very
                # same strategy is also applied in clusters/algorithms/Stepper
                p, _ = offset_from_centre(d, indices)
                indices = sorted(indices,
                                 key=lambda i: -np.inf if i - p == 0 else (i - p))
                for i in indices:
                    name = sregistry.make_name(prefix='sb')
                    md = mds.setdefault((bd, i), ModuloDimension(name, bd, i, size))
                    self.index_mapper[d][i] = md
                    self.sub_iterators[d.root].append(md)
            else:
                assert len(indices) == 1
                self.index_mapper[d][indices[0]] = 0

        # Track the SubDimensions used to index into `function`
        for e in accessv.mapper:
            m = {i.root: i for i in e.free_symbols
                 if isinstance(i, Dimension) and (i.is_Sub or not i.is_Derived)}
            for d, v in m.items():
                self.subdims_mapper[d].add(v)
        if any(len(v) > 1 for v in self.subdims_mapper.values()):
            # Non-uniform SubDimensions. At this point we're going to raise
            # an exception. It's either illegal or still unsupported
            for v in self.subdims_mapper.values():
                for d0, d1 in combinations(v, 2):
                    if d0.overlap(d1):
                        raise InvalidOperator("Cannot apply `buffering` to `%s` as it "
                                              "is accessed over the overlapping "
                                              " SubDimensions `<%s, %s>`" %
                                              (function, d0, d1))
            raise NotImplementedError("`buffering` does not support multiple "
                                      "non-overlapping SubDimensions yet.")
        else:
            self.subdims_mapper = {d: v.pop() for d, v in self.subdims_mapper.items()}

        # Build and sanity-check the buffer IterationIntervals
        self.itintervals_mapper = {}
        for e in accessv.mapper:
            for i in e.ispace.itintervals:
                v = self.itintervals_mapper.setdefault(i.dim, i.args)
                if v != self.itintervals_mapper[i.dim]:
                    raise NotImplementedError("Cannot apply `buffering` as the buffered "
                                              "function `%s` is accessed over multiple, "
                                              "non-compatible iteration spaces along the "
                                              "Dimension `%s`" % (function.name, i.dim))
        # Also add IterationIntervals for initialization along `x`, should `xi` be
        # the only written Dimension in the `x` hierarchy
        for d, (interval, _, _) in list(self.itintervals_mapper.items()):
            for i in d._defines:
                self.itintervals_mapper.setdefault(i, (interval.relaxed, (), Forward))

        # Finally create the actual buffer
        self.buffer = Array(name=sregistry.make_name(prefix='%sb' % function.name),
                            dimensions=dims,
                            dtype=dtype,
                            halo=function.halo,
                            space=space)
示例#15
0
class Buffer(object):

    """
    A buffer with metadata attached.

    Parameters
    ----------
    function : DiscreteFunction
        The object for which the buffer is created.
    contracted_dims : list of Dimension
        The Dimensions in `function` to be contracted, that is to be replaced
        by ModuloDimensions.
    accessv : AccessValue
        All accesses involving `function`.
    options : dict, optional
        The compilation options. See `buffering.__doc__`.
    sregistry : SymbolRegistry
        The symbol registry, to create unique names for buffers and Dimensions.
    bds : dict, optional
        All CustomDimensions created to define buffer dimensions, potentially
        reusable in the creation of this buffer. The object gets updated if new
        CustomDimensions are created.
    mds : dict, optional
        All ModuloDimensions created to index into other buffers, potentially reusable
        for indexing into this buffer. The object gets updated if new ModuloDimensions
        are created.
    """

    def __init__(self, function, contracted_dims, accessv, options, sregistry,
                 bds=None, mds=None):
        # Parse compilation options
        async_degree = options['buf-async-degree']
        space = options['buf-mem-space']
        dtype = options['buf-dtype'](function)

        self.function = function
        self.accessv = accessv

        self.contraction_mapper = {}
        self.index_mapper = defaultdict(dict)
        self.sub_iterators = defaultdict(list)
        self.subdims_mapper = DefaultOrderedDict(set)

        # Create the necessary ModuloDimensions for indexing into the buffer
        # E.g., `u[time,x] + u[time+1,x] -> `ub[sb0,x] + ub[sb1,x]`, where `sb0`
        # and `sb1` are ModuloDimensions starting at `time` and `time+1` respectively
        dims = list(function.dimensions)
        for d in contracted_dims:
            assert d in function.dimensions

            # Determine the buffer size, and therefore the span of the ModuloDimension,
            # along the contracting Dimension `d`
            indices = filter_ordered(i.indices[d] for i in accessv.accesses)
            slots = [i.subs({d: 0, d.spacing: 1}) for i in indices]
            try:
                size = max(slots) - min(slots) + 1
            except TypeError:
                # E.g., special case `slots=[-1 + time/factor, 2 + time/factor]`
                # Resort to the fast vector-based comparison machinery (rather than
                # the slower sympy.simplify)
                slots = [Vector(i) for i in slots]
                size = int((vmax(*slots) - vmin(*slots) + 1)[0])

            if async_degree is not None:
                if async_degree < size:
                    warning("Ignoring provided asynchronous degree as it'd be "
                            "too small for the required buffer (provided %d, "
                            "but need at least %d for `%s`)"
                            % (async_degree, size, function.name))
                else:
                    size = async_degree

            # Replace `d` with a suitable CustomDimension `bd`
            name = sregistry.make_name(prefix='db')
            bd = bds.setdefault((d, size), CustomDimension(name, 0, size-1, size, d))
            self.contraction_mapper[d] = dims[dims.index(d)] = bd

            # Finally create the ModuloDimensions as children of `bd`
            if size > 1:
                # Note: indices are sorted so that the semantic order (sb0, sb1, sb2)
                # follows SymPy's index ordering (time, time-1, time+1) after modulo
                # replacement, so that associativity errors are consistent. This very
                # same strategy is also applied in clusters/algorithms/Stepper
                p, _ = offset_from_centre(d, indices)
                indices = sorted(indices,
                                 key=lambda i: -np.inf if i - p == 0 else (i - p))
                for i in indices:
                    name = sregistry.make_name(prefix='sb')
                    md = mds.setdefault((bd, i), ModuloDimension(name, bd, i, size))
                    self.index_mapper[d][i] = md
                    self.sub_iterators[d.root].append(md)
            else:
                assert len(indices) == 1
                self.index_mapper[d][indices[0]] = 0

        # Track the SubDimensions used to index into `function`
        for e in accessv.mapper:
            m = {i.root: i for i in e.free_symbols
                 if isinstance(i, Dimension) and (i.is_Sub or not i.is_Derived)}
            for d, v in m.items():
                self.subdims_mapper[d].add(v)
        if any(len(v) > 1 for v in self.subdims_mapper.values()):
            # Non-uniform SubDimensions. At this point we're going to raise
            # an exception. It's either illegal or still unsupported
            for v in self.subdims_mapper.values():
                for d0, d1 in combinations(v, 2):
                    if d0.overlap(d1):
                        raise InvalidOperator("Cannot apply `buffering` to `%s` as it "
                                              "is accessed over the overlapping "
                                              " SubDimensions `<%s, %s>`" %
                                              (function, d0, d1))
            raise NotImplementedError("`buffering` does not support multiple "
                                      "non-overlapping SubDimensions yet.")
        else:
            self.subdims_mapper = {d: v.pop() for d, v in self.subdims_mapper.items()}

        # Build and sanity-check the buffer IterationIntervals
        self.itintervals_mapper = {}
        for e in accessv.mapper:
            for i in e.ispace.itintervals:
                v = self.itintervals_mapper.setdefault(i.dim, i.args)
                if v != self.itintervals_mapper[i.dim]:
                    raise NotImplementedError("Cannot apply `buffering` as the buffered "
                                              "function `%s` is accessed over multiple, "
                                              "non-compatible iteration spaces along the "
                                              "Dimension `%s`" % (function.name, i.dim))
        # Also add IterationIntervals for initialization along `x`, should `xi` be
        # the only written Dimension in the `x` hierarchy
        for d, (interval, _, _) in list(self.itintervals_mapper.items()):
            for i in d._defines:
                self.itintervals_mapper.setdefault(i, (interval.relaxed, (), Forward))

        # Finally create the actual buffer
        self.buffer = Array(name=sregistry.make_name(prefix='%sb' % function.name),
                            dimensions=dims,
                            dtype=dtype,
                            halo=function.halo,
                            space=space)

    def __repr__(self):
        return "Buffer[%s,<%s>]" % (self.buffer.name,
                                    ','.join(str(i) for i in self.contraction_mapper))

    @property
    def size(self):
        return np.prod([v.symbolic_size for v in self.contraction_mapper.values()])

    @property
    def is_read(self):
        return self.accessv.is_read

    @property
    def is_readonly(self):
        return self.is_read and self.lastwrite is None

    @property
    def firstread(self):
        return self.accessv.firstread

    @property
    def lastwrite(self):
        return self.accessv.lastwrite

    @property
    def has_uniform_subdims(self):
        return self.subdims_mapper is not None

    @cached_property
    def indexed(self):
        return self.buffer.indexed

    @cached_property
    def index_mapper_flat(self):
        ret = {}
        for mapper in self.index_mapper.values():
            ret.update(mapper)
        return ret

    @cached_property
    def writeto(self):
        """
        The `writeto` IterationSpace, that is the iteration space that must be
        iterated over in order to initialize the buffer.
        """
        intervals = []
        sub_iterators = {}
        directions = {}
        for d in self.buffer.dimensions:
            try:
                interval, si, direction = self.itintervals_mapper[d]
            except KeyError:
                # E.g., the contraction Dimension `db0`
                assert d in self.contraction_mapper.values()
                interval, si, direction = Interval(d, 0, 0), (), Forward
            intervals.append(interval)
            sub_iterators[d] = si
            directions[d] = direction

        relations = (self.buffer.dimensions,)
        intervals = IntervalGroup(intervals, relations=relations)

        return IterationSpace(intervals, sub_iterators, directions)

    @cached_property
    def written(self):
        """
        The `written` IterationSpace, that is the iteration space that must be
        iterated over in order to read all of the written buffer values.
        """
        intervals = []
        sub_iterators = {}
        directions = {}
        for dd in self.function.dimensions:
            d = dd.xreplace(self.subdims_mapper)
            try:
                interval, si, direction = self.itintervals_mapper[d]
            except KeyError:
                # E.g., d=time_sub
                assert d.is_NonlinearDerived
                d = d.root
                interval, si, direction = self.itintervals_mapper[d]
            intervals.append(interval)
            sub_iterators[d] = si + as_tuple(self.sub_iterators[d])
            directions[d] = direction

        relations = (tuple(i.dim for i in intervals),)
        intervals = IntervalGroup(intervals, relations=relations)

        return IterationSpace(intervals, sub_iterators, directions)

    @cached_property
    def lastmap(self):
        """
        A mapper from contracted Dimensions to a 2-tuple of indices representing,
        respectively, the "last" write to the buffer and the "last" read from the
        buffered Function. For example, `{time: (sb1, time+1)}`.
        """
        mapper = {}
        for d, m in self.index_mapper.items():
            try:
                func = max if self.written.directions[d.root] is Forward else min
                v = func(m)
            except TypeError:
                func = vmax if self.written.directions[d.root] is Forward else vmin
                v = func(*[Vector(i) for i in m])[0]
            mapper[d] = Map(m[v], v)

        return mapper

    @cached_property
    def initmap(self):
        """
        A mapper from contracted Dimensions to indices representing the min points
        for buffer initialization. For example, in the case of a forward-propagating
        `time` Dimension, we could have `{time: (time_m + db0) % 2, (time_m + db0)}`;
        likewise, for backwards, `{time: (time_M - 2 + db0) % 4, time_M - 2 + db0}`.
        """
        mapper = {}
        for d, bd in self.contraction_mapper.items():
            indices = list(self.index_mapper[d])

            # The buffer is initialized at `d_m(d_M) - offset`. E.g., a buffer with
            # six slots, used to replace a buffered Function accessed at `d-3`, `d`
            # and `d + 2`, will have `offset = 3`
            p, offset = offset_from_centre(d, indices)

            if self.written.directions[d.root] is Forward:
                v = p.subs(d.root, d.root.symbolic_min) - offset + bd
            else:
                v = p.subs(d.root, d.root.symbolic_max) - offset + bd

            mapper[d] = Map(v % bd.symbolic_size, v)

        return mapper
示例#16
0
def stree_schedule(clusters):
    """
    Arrange an iterable of Clusters into a ScheduleTree.
    """
    stree = ScheduleTree()

    prev = Cluster(None)
    mapper = DefaultOrderedDict(lambda: Bunch(top=None, bottom=None))

    def reuse_metadata(c0, c1, d):
        return (c0.guards.get(d) == c1.guards.get(d)
                and c0.syncs.get(d) == c1.syncs.get(d))

    def attach_metadata(cluster, d, tip):
        if d in cluster.guards:
            tip = NodeConditional(cluster.guards[d], tip)
        if d in cluster.syncs:
            tip = NodeSync(cluster.syncs[d], tip)
        return tip

    for c in clusters:
        index = 0

        # Reuse or add in any Conditionals and Syncs outside of the outermost Iteration
        if not reuse_metadata(c, prev, None):
            tip = attach_metadata(c, None, stree)
            maybe_reusable = []
        else:
            try:
                tip = mapper[prev.itintervals[index]].top.parent
            except IndexError:
                tip = stree
            maybe_reusable = prev.itintervals

        for it0, it1 in zip(c.itintervals, maybe_reusable):
            if it0 != it1:
                break
            index += 1

            d = it0.dim

            # The reused sub-trees might acquire new sub-iterators as well as
            # new properties
            mapper[it0].top.ispace = IterationSpace.union(
                mapper[it0].top.ispace, c.ispace.project([d]))
            mapper[it0].top.properties = normalize_properties(
                mapper[it0].top.properties, c.properties[it0.dim])

            # Different guards or SyncOps cannot further be nested
            if not reuse_metadata(c, prev, d):
                tip = mapper[it0].top
                tip = attach_metadata(c, d, tip)
                mapper[it0].bottom = tip
                break
            else:
                tip = mapper[it0].bottom

        # Nested sub-trees, instead, will not be used anymore
        for it in prev.itintervals[index:]:
            mapper.pop(it)

        # Add in Iterations, Conditionals, and Syncs
        for it in c.itintervals[index:]:
            d = it.dim
            tip = NodeIteration(c.ispace.project([d]), tip,
                                c.properties.get(d, ()))
            mapper[it].top = tip
            tip = attach_metadata(c, d, tip)
            mapper[it].bottom = tip

        # Add in Expressions
        exprs = []
        for conditionals, g in groupby(c.exprs, key=lambda e: e.conditionals):
            exprs = list(g)

            # Indirect ConditionalDimensions induce expression-level guards
            if conditionals:
                guard = And(*conditionals.values(), evaluate=False)
                parent = NodeConditional(guard, tip)
            else:
                parent = tip

            NodeExprs(exprs, c.ispace, c.dspace, c.ops, c.traffic, parent)

        # Prepare for next iteration
        prev = c

    return stree