Пример #1
0
    def _schedule_expressions(self, clusters):
        """Wrap :class:`Expression` objects, already grouped in :class:`Cluster`
        objects, within nested :class:`Iteration` objects (representing loops),
        according to dimensions and stencils."""

        # Topologically sort Iterations
        ordering = partial_order([i.stencil.dimensions for i in clusters])
        for i, d in enumerate(list(ordering)):
            if d.is_Buffered:
                ordering.insert(i, d.parent)

        # Build the Iteration/Expression tree
        processed = []
        schedule = OrderedDict()
        atomics = ()
        for i in clusters:
            # Build the Expression objects to be inserted within an Iteration tree
            expressions = [Expression(v, np.int32 if i.trace.is_index(k) else self.dtype)
                           for k, v in i.trace.items()]

            if not i.stencil.empty:
                root = None
                entries = i.stencil.entries

                # Reorder based on the globally-established loop ordering
                entries = sorted(entries, key=lambda i: ordering.index(i.dim))

                # Can I reuse any of the previously scheduled Iterations ?
                index = 0
                for j0, j1 in zip(entries, list(schedule)):
                    if j0 != j1 or j0.dim in atomics:
                        break
                    root = schedule[j1]
                    index += 1
                needed = entries[index:]

                # Build and insert the required Iterations
                iters = [Iteration([], j.dim, j.dim.size, offsets=j.ofs) for j in needed]
                body, tree = compose_nodes(iters + [expressions], retrieve=True)
                scheduling = OrderedDict(zip(needed, tree))
                if root is None:
                    processed.append(body)
                    schedule = scheduling
                else:
                    nodes = list(root.nodes) + [body]
                    mapper = {root: root._rebuild(nodes, **root.args_frozen)}
                    transformer = Transformer(mapper)
                    processed = list(transformer.visit(processed))
                    schedule = OrderedDict(list(schedule.items())[:index] +
                                           list(scheduling.items()))
                    for k, v in list(schedule.items()):
                        schedule[k] = transformer.rebuilt.get(v, v)
            else:
                # No Iterations are needed
                processed.extend(expressions)

            # Track dimensions that cannot be fused at next stage
            atomics = i.atomics

        return List(body=processed)
Пример #2
0
def copy_arrays(mapper, reverse=False):
    """
    Build an Iteration/Expression tree performing the copy ``k = v``, or
    ``v = k`` if reverse=True, for each (k, v) in mapper. (k, v) are expected
    to be of type :class:`IndexedData`. The loop bounds are inferred from
    the dimensions used in ``k``.
    """
    if not mapper:
        return ()

    # Build the Iteration tree for the copy
    iterations = []
    for k, v in mapper.items():
        handle = []
        indices = k.function.indices
        for i, j in zip(k.shape, indices):
            handle.append(Iteration([], dimension=j, limits=i))
        lhs, rhs = (v, k) if reverse else (k, v)
        handle.append(
            Expression(Eq(lhs[indices], rhs[indices]), dtype=k.function.dtype))
        iterations.append(compose_nodes(handle))

    # Maybe some Iterations are mergeable
    iterations = MergeOuterIterations().visit(iterations)

    return iterations
Пример #3
0
 def merge(self, iter1, iter2):
     """Creates a new merged :class:`Iteration` object from two
     loops along the same dimension.
     """
     newexpr = iter1.nodes + iter2.nodes
     return Iteration(newexpr,
                      dimension=iter1.dim,
                      limits=iter1.limits,
                      offsets=iter1.offsets)
Пример #4
0
def iters(dims):
    return [
        lambda ex: Iteration(ex, dims['i'], (0, 3, 1)),
        lambda ex: Iteration(ex, dims['j'], (0, 5, 1)),
        lambda ex: Iteration(ex, dims['k'], (0, 7, 1)),
        lambda ex: Iteration(ex, dims['s'], (0, 4, 1)),
        lambda ex: Iteration(ex, dims['q'], (0, 4, 1)),
        lambda ex: Iteration(ex, dims['l'], (0, 6, 1)),
        lambda ex: Iteration(ex, x, (0, 5, 1)),
        lambda ex: Iteration(ex, y, (0, 7, 1))
    ]
Пример #5
0
    def _schedule_expressions(self, clusters, ordering):
        """Wrap :class:`Expression` objects, already grouped in :class:`Cluster`
        objects, within nested :class:`Iteration` objects (representing loops),
        according to dimensions and stencils."""

        processed = []
        schedule = OrderedDict()
        for i in clusters:
            # Build the Expression objects to be inserted within an Iteration tree
            expressions = [
                Expression(v, np.int32 if i.trace.is_index(k) else self.dtype)
                for k, v in i.trace.items()
            ]

            if not i.stencil.empty:
                root = None
                entries = i.stencil.entries

                # Can I reuse any of the previously scheduled Iterations ?
                index = 0
                for j0, j1 in zip(entries, list(schedule)):
                    if j0 != j1:
                        break
                    root = schedule[j1]
                    index += 1
                needed = entries[index:]

                # Build and insert the required Iterations
                iters = [
                    Iteration([], j.dim, j.dim.size, offsets=j.ofs)
                    for j in needed
                ]
                body, tree = compose_nodes(iters + [expressions],
                                           retrieve=True)
                scheduling = OrderedDict(zip(needed, tree))
                if root is None:
                    processed.append(body)
                    schedule = scheduling
                else:
                    nodes = list(root.nodes) + [body]
                    mapper = {root: root._rebuild(nodes, **root.args_frozen)}
                    transformer = Transformer(mapper)
                    processed = list(transformer.visit(processed))
                    schedule = OrderedDict(
                        list(schedule.items())[:index] +
                        list(scheduling.items()))
                    for k, v in list(schedule.items()):
                        schedule[k] = transformer.rebuilt.get(v, v)
            else:
                # No Iterations are needed
                processed.extend(expressions)

        return processed
Пример #6
0
    def unfold(self):
        """
        Return the corresponding :class:`Iteration` objects from each fold in ``self``.
        """
        args = self.args
        args.pop('folds')

        # Construct the root Iteration
        root = Iteration(**args)

        # Construct the folds
        args.pop('nodes')
        args.pop('offsets')
        try:
            start, end, incr = args.pop('limits')
        except TypeError:
            start, end, incr = self.limits
        folds = tuple(
            Iteration(
                nodes, limits=[start + ofs[0], end + ofs[1], incr], **args)
            for ofs, nodes in self.folds)

        return folds + as_tuple(root)
Пример #7
0
    def _insert_declarations(self, dle_state, parameters):
        """Populate the Operator's body with the required array and
        variable declarations, to generate a legal C file."""

        nodes = dle_state.nodes

        # Resolve function calls first
        scopes = []
        for k, v in FindScopes().visit(nodes).items():
            if k.is_FunCall:
                function = dle_state.func_table[k.name]
                scopes.extend(FindScopes().visit(function,
                                                 queue=list(v)).items())
            else:
                scopes.append((k, v))

        # Determine all required declarations
        allocator = Allocator()
        mapper = OrderedDict()
        for k, v in scopes:
            if k.is_scalar:
                # Inline declaration
                mapper[k] = LocalExpression(**k.args)
            elif k.output_function._mem_external:
                # Nothing to do, variable passed as kernel argument
                continue
            elif k.output_function._mem_stack:
                # On the stack, as established by the DLE
                key = lambda i: i.dim not in k.output_function.indices
                site = filter_iterations(v, key=key, stop='consecutive')
                allocator.push_stack(site[-1], k.output_function)
            else:
                # On the heap, as a tensor that must be globally accessible
                allocator.push_heap(k.output_function)

        # Introduce declarations on the stack
        for k, v in allocator.onstack:
            allocs = as_tuple([Element(i) for i in v])
            mapper[k] = Iteration(allocs + k.nodes, **k.args_frozen)
        nodes = Transformer(mapper).visit(nodes)
        elemental_functions = Transformer(mapper).visit(
            dle_state.elemental_functions)

        # Introduce declarations on the heap (if any)
        if allocator.onheap:
            decls, allocs, frees = zip(*allocator.onheap)
            nodes = List(header=decls + allocs, body=nodes, footer=frees)

        return nodes, elemental_functions
Пример #8
0
def first_touch(array):
    """Uses the Propagator low-level API to initialize the given array(in Devito types)
    in the same pattern that would later be used to access it.
    """
    from devito.propagator import Propagator
    from devito.interfaces import TimeData
    from devito.nodes import Iteration
    from devito.pointdata import PointData

    exp_init = [Eq(array.indexed[array.indices], 0)]
    it_init = []
    time_dim = t
    if isinstance(array, TimeData):
        shape = array.shape
        time_steps = shape[0]
        shape = shape[1:]
        space_dims = array.indices[1:]
    else:
        if isinstance(array, PointData):
            it_init = [
                Iteration(exp_init,
                          dimension=array.indices[1],
                          limits=array.shape[1])
            ]
            exp_init = []
            time_steps = array.shape[0]
            time_dim = array.indices[0]
            shape = []
            space_dims = []
        else:
            shape = array.shape
            time_steps = 1
            space_dims = array.indices
    prop = Propagator(name="init",
                      nt=time_steps,
                      shape=shape,
                      stencils=exp_init,
                      space_dims=space_dims,
                      time_dim=time_dim)
    prop.add_devito_param(array)
    prop.save_vars[array.name] = True
    prop.time_loop_stencils_a = it_init
    prop.run([array.data])
Пример #9
0
    def _loop_fission(self, state, **kwargs):
        """
        Apply loop fission to innermost :class:`Iteration` objects. This pass
        is not applied if the number of statements in an Iteration's body is
        lower than ``self.thresholds['fission'].``
        """

        processed = []
        for node in state.nodes:
            mapper = {}
            for tree in retrieve_iteration_tree(node):
                if len(tree) <= 1:
                    # Heuristically avoided
                    continue

                candidate = tree[-1]
                expressions = [e for e in candidate.nodes if e.is_Expression]

                if len(expressions) < self.thresholds['max_fission']:
                    # Heuristically avoided
                    continue
                if len(expressions) != len(candidate.nodes):
                    # Dangerous for correctness
                    continue

                functions = list(
                    set.union(*[set(e.functions) for e in expressions]))
                wrapped = [e.expr for e in expressions]

                if not functions or not wrapped:
                    # Heuristically avoided
                    continue

                # Promote temporaries from scalar to tensors
                handle = functions[0]
                dim = handle.indices[-1]
                size = handle.shape[-1]
                if any(dim != i.indices[-1] for i in functions):
                    # Dangerous for correctness
                    continue

                wrapped = promote_scalar_expressions(wrapped, (size, ),
                                                     (dim, ), True)

                assert len(wrapped) == len(expressions)
                rebuilt = [
                    Expression(s, e.dtype)
                    for s, e in zip(wrapped, expressions)
                ]

                # Group statements
                # TODO: Need a heuristic here to maximize reuse
                args_frozen = candidate.args_frozen
                properties = as_tuple(
                    args_frozen['properties']) + (ELEMENTAL, )
                args_frozen['properties'] = properties
                n = self.thresholds['min_fission']
                fissioned = [
                    Iteration(g, **args_frozen) for g in grouper(rebuilt, n)
                ]

                mapper[candidate] = List(body=fissioned)

            processed.append(Transformer(mapper).visit(node))

        return {'nodes': processed}
Пример #10
0
    def _loop_blocking(self, state, **kwargs):
        """
        Apply loop blocking to :class:`Iteration` trees.

        Blocking is applied to parallel iteration trees. Heuristically, innermost
        dimensions are not blocked to maximize the trip count of the SIMD loops.

        Different heuristics may be specified by passing the keywords ``blockshape``
        and ``blockinner`` to the DLE. The former, a dictionary, is used to indicate
        a specific block size for each blocked dimension. For example, for the
        :class:`Iteration` tree: ::

            for i
              for j
                for k
                  ...

        one may provide ``blockshape = {i: 4, j: 7}``, in which case the
        two outer loops will blocked, and the resulting 2-dimensional block will
        have size 4x7. The latter may be set to True to also block innermost parallel
        :class:`Iteration` objects.
        """
        exclude_innermost = not self.params.get('blockinner', False)
        ignore_heuristic = self.params.get('blockalways', False)

        blocked = OrderedDict()
        processed = []
        for node in state.nodes:
            # Make sure loop blocking will span as many Iterations as possible
            fold = fold_blockable_tree(node, exclude_innermost)

            mapper = {}
            for tree in retrieve_iteration_tree(fold):
                # Is the Iteration tree blockable ?
                iterations = [i for i in tree if i.is_Parallel]
                if exclude_innermost:
                    iterations = [
                        i for i in iterations if not i.is_Vectorizable
                    ]
                if len(iterations) <= 1:
                    continue
                root = iterations[0]
                if not IsPerfectIteration().visit(root):
                    # Illegal/unsupported
                    continue
                if not tree[0].is_Sequential and not ignore_heuristic:
                    # Heuristic: avoid polluting the generated code with blocked
                    # nests (thus increasing JIT compilation time and affecting
                    # readability) if the blockable tree isn't embedded in a
                    # sequential loop (e.g., a timestepping loop)
                    continue

                # Decorate intra-block iterations with an IterationProperty
                TAG = tagger(len(mapper))

                # Build all necessary Iteration objects, individually. These will
                # subsequently be composed to implement loop blocking.
                inter_blocks = []
                intra_blocks = []
                remainders = []
                for i in iterations:
                    # Build Iteration over blocks
                    dim = blocked.setdefault(
                        i, Dimension("%s_block" % i.dim.name))
                    block_size = dim.symbolic_size
                    iter_size = i.dim.size or i.dim.symbolic_size
                    start = i.limits[0] - i.offsets[0]
                    finish = iter_size - i.offsets[1]
                    innersize = iter_size - (-i.offsets[0] + i.offsets[1])
                    finish = finish - (innersize % block_size)
                    inter_block = Iteration([],
                                            dim, [start, finish, block_size],
                                            properties=PARALLEL)
                    inter_blocks.append(inter_block)

                    # Build Iteration within a block
                    start = inter_block.dim
                    finish = start + block_size
                    intra_block = i._rebuild([],
                                             limits=[start, finish, 1],
                                             offsets=None,
                                             properties=i.properties +
                                             (TAG, ELEMENTAL))
                    intra_blocks.append(intra_block)

                    # Build unitary-increment Iteration over the 'leftover' region.
                    # This will be used for remainder loops, executed when any
                    # dimension size is not a multiple of the block size.
                    start = inter_block.limits[1]
                    finish = iter_size - i.offsets[1]
                    remainder = i._rebuild([],
                                           limits=[start, finish, 1],
                                           offsets=None)
                    remainders.append(remainder)

                # Build blocked Iteration nest
                blocked_tree = compose_nodes(inter_blocks + intra_blocks +
                                             [iterations[-1].nodes])

                # Build remainder Iterations
                remainder_trees = []
                for n in range(len(iterations)):
                    for c in combinations([i.dim for i in iterations], n + 1):
                        # First all inter-block Interations
                        nodes = [
                            b._rebuild(properties=b.properties + (REMAINDER, ))
                            for b, r in zip(inter_blocks, remainders)
                            if r.dim not in c
                        ]
                        # Then intra-block or remainder, for each dim (in order)
                        properties = (REMAINDER, TAG, ELEMENTAL)
                        for b, r in zip(intra_blocks, remainders):
                            handle = r if b.dim in c else b
                            nodes.append(
                                handle._rebuild(properties=properties))
                        nodes.extend([iterations[-1].nodes])
                        remainder_trees.append(compose_nodes(nodes))

                # Will replace with blocked loop tree
                mapper[root] = List(body=[blocked_tree] + remainder_trees)

            rebuilt = Transformer(mapper).visit(fold)

            # Finish unrolling any previously folded Iterations
            processed.append(unfold_blocked_tree(rebuilt))

        # All blocked dimensions
        if not blocked:
            return {'nodes': processed}

        # Determine the block shape
        blockshape = self.params.get('blockshape')
        if not blockshape:
            # Use trivial heuristic for a suitable blockshape
            def heuristic(dim_size):
                ths = 8  # FIXME: This really needs to be improved
                return ths if dim_size > ths else 1

            blockshape = {k: heuristic for k in blocked.keys()}
        else:
            try:
                nitems, nrequired = len(blockshape), len(blocked)
                blockshape = {k: v for k, v in zip(blocked, blockshape)}
                if nitems > nrequired:
                    dle_warning("Provided 'blockshape' has more entries than "
                                "blocked loops; dropping entries ...")
                if nitems < nrequired:
                    dle_warning("Provided 'blockshape' has fewer entries than "
                                "blocked loops; dropping dimensions ...")
            except TypeError:
                blockshape = {list(blocked)[0]: blockshape}
            blockshape.update(
                {k: None
                 for k in blocked.keys() if k not in blockshape})

        # Track any additional arguments required to execute /state.nodes/
        arguments = [
            BlockingArg(v, k, blockshape[k]) for k, v in blocked.items()
        ]

        return {
            'nodes': processed,
            'arguments': arguments,
            'flags': 'blocking'
        }
Пример #11
0
    def _analyze(self, state):
        """
        Analyze the Iteration/Expression trees in ``state.nodes`` to detect
        information useful to the subsequent DLE passes.

        In particular, the presence of fully-parallel or "outermost-sequential
        inner-parallel" (OSIP) :class:`Iteration` trees is tracked. In an OSIP
        :class:`Iteration` tree, outermost :class:`Iteration` objects represent
        an inherently sequential dimension, whereas all inner :class:`Iteration`
        objects represent parallelizable dimensions.
        """

        nodes = state.nodes

        sections = FindSections().visit(nodes)
        trees = sections.keys()
        candidate = max(trees, key=lambda i: len(i))
        candidates = [i for i in trees if len(i) == len(candidate)]

        # The analysis below may return "false positives" (ie, absence of fully-
        # parallel or OSIP trees when this is actually false), but this should
        # never be the case in practice, given the targeted stencil codes.
        mapper = OrderedDict()
        for tree in candidates:
            exprs = [e.expr for e in sections[tree]]

            # "Prefetch" objects to speed up the analsys
            terms = {e: tuple(terminals(e.rhs)) for e in exprs}
            writes = {e.lhs for e in exprs if not e.is_Symbol}

            # Does the Iteration index only appear in the outermost dimension ?
            has_parallel_dimension = True
            for k, v in terms.items():
                for i in writes:
                    maybe_dependencies = [
                        j for j in v
                        if as_symbol(i) == as_symbol(j) and not j.is_Symbol
                    ]
                    for j in maybe_dependencies:
                        handle = flatten(k.atoms() for k in j.indices[1:])
                        has_parallel_dimension &= not (i.indices[0] in handle)
            if not has_parallel_dimension:
                continue

            # Is the Iteration tree fully-parallel or OSIP?
            is_OSIP = False
            for e1 in exprs:
                lhs = e1.lhs
                if lhs.is_Symbol:
                    continue
                for e2 in exprs:
                    handle = [
                        i for i in terms[e2] if as_symbol(i) == as_symbol(lhs)
                    ]
                    if any(lhs.indices[0] != i.indices[0] for i in handle):
                        is_OSIP = True
                        break

            # Track the discovered properties
            if is_OSIP:
                mapper.setdefault(tree[0], []).append('sequential')
            for i in tree[is_OSIP:-1]:
                mapper.setdefault(i, []).append('parallel')
            mapper.setdefault(tree[-1], []).extend(['parallel', 'vector-dim'])

        # Introduce the discovered properties in the Iteration/Expression tree
        for k, v in list(mapper.items()):
            args = k.args
            # 'sequential' has obviously precedence over 'parallel'
            properties = ('sequential', ) if 'sequential' in v else tuple(v)
            properties = as_tuple(args.pop('properties')) + properties
            mapper[k] = Iteration(properties=properties, **args)
        nodes = NestedTransformer(mapper).visit(nodes)

        return {'nodes': nodes}
Пример #12
0
    def _loop_blocking(self, state, **kwargs):
        """
        Apply loop blocking to :class:`Iteration` trees.

        By default, the blocked :class:`Iteration` objects and the block size are
        determined heuristically. The heuristic consists of searching the deepest
        Iteration/Expression tree and blocking all dimensions except:

            * The innermost (eg, to retain SIMD vectorization);
            * Those dimensions inducing loop-carried dependencies.

        The caller may take over the heuristic through ``kwargs['blocking']``,
        a dictionary indicating the block size of each blocked dimension. For
        example, for the :class:`Iteration` tree below: ::

            for i
              for j
                for k
                  ...

        one may pass in ``kwargs['blocking'] = {i: 4, j: 7}``, in which case the
        two outer loops would be blocked, and the resulting 2-dimensional block
        would be of size 4x7.
        """
        Region = namedtuple('Region', 'main leftover')

        blocked = OrderedDict()
        processed = []
        for node in state.nodes:
            mapper = {}
            for tree in retrieve_iteration_tree(node):
                # Is the Iteration tree blockable ?
                iterations = [i for i in tree if i.is_Parallel]
                if 'blockinner' not in self.params:
                    iterations = [
                        i for i in iterations if not i.is_Vectorizable
                    ]
                if not iterations:
                    continue
                root = iterations[0]
                if not IsPerfectIteration().visit(root):
                    continue

                # Construct the blocked loop nest, as well as all necessary
                # remainder loops
                regions = OrderedDict()
                blocked_iterations = []
                for i in iterations:
                    # Build Iteration over blocks
                    dim = blocked.setdefault(
                        i, Dimension("%s_block" % i.dim.name))
                    block_size = dim.symbolic_size
                    iter_size = i.dim.size or i.dim.symbolic_size
                    start = i.limits[0] - i.offsets[0]
                    finish = iter_size - i.offsets[1]
                    finish = finish - ((finish - i.offsets[1]) % block_size)
                    inter_block = Iteration([],
                                            dim, [start, finish, block_size],
                                            properties=as_tuple('parallel'))

                    # Build Iteration within a block
                    start = inter_block.dim
                    finish = start + block_size
                    properties = 'vector-dim' if i.is_Vectorizable else None
                    intra_block = Iteration([],
                                            i.dim, [start, finish, 1],
                                            i.index,
                                            properties=as_tuple(properties))

                    blocked_iterations.append((inter_block, intra_block))

                    # Build unitary-increment Iteration over the 'main' region
                    # (the one blocked); necessary to generate code iterating over
                    # non-blocked ("remainder") iterations.
                    start = inter_block.limits[0]
                    finish = inter_block.limits[1]
                    main = Iteration([],
                                     i.dim, [start, finish, 1],
                                     i.index,
                                     properties=i.properties)

                    # Build unitary-increment Iteration over the 'leftover' region:
                    # again as above, this may be necessary when the dimension size
                    # is not a multiple of the block size.
                    start = inter_block.limits[1]
                    finish = iter_size - i.offsets[1]
                    leftover = Iteration([],
                                         i.dim, [start, finish, 1],
                                         i.index,
                                         properties=i.properties)

                    regions[i] = Region(main, leftover)

                blocked_tree = list(flatten(zip(*blocked_iterations)))
                blocked_tree = compose_nodes(blocked_tree +
                                             [iterations[-1].nodes])

                # Build remainder loops
                remainder_tree = []
                for n in range(len(iterations)):
                    for i in combinations(iterations, n + 1):
                        nodes = [
                            v.leftover if k in i else v.main
                            for k, v in regions.items()
                        ]
                        nodes += [iterations[-1].nodes]
                        remainder_tree.append(compose_nodes(nodes))

                # Will replace with blocked loop tree
                mapper[root] = List(body=[blocked_tree] + remainder_tree)

            rebuilt = Transformer(mapper).visit(node)

            processed.append(rebuilt)

        # All blocked dimensions
        if not blocked:
            return {'nodes': processed}

        # Determine the block shape
        blockshape = self.params.get('blockshape')
        if not blockshape:
            # Use trivial heuristic for a suitable blockshape
            def heuristic(dim_size):
                ths = 8  # FIXME: This really needs to be improved
                return ths if dim_size > ths else 1

            blockshape = {k: heuristic for k in blocked.keys()}
        else:
            try:
                nitems, nrequired = len(blockshape), len(blocked)
                blockshape = {k: v for k, v in zip(blocked, blockshape)}
                if nitems > nrequired:
                    dle_warning("Provided 'blockshape' has more entries than "
                                "blocked loops; dropping entries ...")
                if nitems < nrequired:
                    dle_warning("Provided 'blockshape' has fewer entries than "
                                "blocked loops; dropping dimensions ...")
            except TypeError:
                blockshape = {list(blocked)[0]: blockshape}
            blockshape.update(
                {k: None
                 for k in blocked.keys() if k not in blockshape})

        # Track any additional arguments required to execute /state.nodes/
        arguments = [
            BlockingArg(v, k, blockshape[k]) for k, v in blocked.items()
        ]

        return {
            'nodes': processed,
            'arguments': arguments,
            'flags': 'blocking'
        }
Пример #13
0
    def _analyze(self, state):
        """
        Analyze the Iteration/Expression trees in ``state.nodes`` to detect
        information useful to the subsequent DLE passes.

        In particular, fully-parallel or "outermost-sequential inner-parallel"
        (OSIP) :class:`Iteration` trees are searched tracked. In an OSIP
        :class:`Iteration` tree, the outermost :class:`Iteration` represents
        a sequential dimension, whereas all inner :class:`Iteration` objects
        represent parallel dimensions.
        """
        nodes = state.nodes
        sections = FindSections().visit(nodes)

        # The analysis below may return "false positives" (ie, absence of fully-
        # parallel or OSIP trees when this is actually false), but this should
        # never be the case in practice, given the targeted stencil codes.
        mapper = OrderedDict()
        for tree, nexprs in sections.items():
            exprs = [e.expr for e in nexprs]

            # "Prefetch" objects to speed up the analsys
            terms = {e: tuple(retrieve_terminals(e.rhs)) for e in exprs}

            # Determine whether the Iteration tree ...
            is_FP = True  # ... is fully parallel (FP)
            is_OP = True  # ... has an outermost parallel dimension (OP)
            is_OSIP = True  # ... is of type OSIP
            is_US = True  # ... has a unit-strided innermost dimension (US)
            for lhs in [e.lhs for e in exprs if not e.lhs.is_Symbol]:
                for e in exprs:
                    for i in [j for j in terms[e] if as_symbol(j) == as_symbol(lhs)]:
                        is_FP &= lhs.indices == i.indices

                        is_OP &= lhs.indices[0] == i.indices[0] and\
                            all(lhs.indices[0].free_symbols.isdisjoint(j.free_symbols)
                                for j in i.indices[1:])  # not A[x,y] = A[x,x+1]

                        is_US &= lhs.indices[-1] == i.indices[-1]

                        lhs_function, i_function = lhs.base.function, i.base.function
                        is_OSIP &= lhs_function.indices[0] == i_function.indices[0] and\
                            (lhs.indices[0] != i.indices[0] or len(lhs.indices) == 1 or
                             lhs.indices[1] == i.indices[1])

            # Build a node->property mapper
            if is_FP:
                for i in tree:
                    mapper.setdefault(i, []).append(PARALLEL)
            elif is_OP:
                mapper.setdefault(tree[0], []).append(PARALLEL)
            elif is_OSIP:
                mapper.setdefault(tree[0], []).append(SEQUENTIAL)
                for i in tree[1:]:
                    mapper.setdefault(i, []).append(PARALLEL)
            if IsPerfectIteration().visit(tree[-1]) and (is_FP or is_OSIP or is_US):
                # Vectorizable
                if len(tree) > 1 and SEQUENTIAL not in mapper.get(tree[-2], []):
                    # Heuristic: there's at least an outer parallel Iteration
                    mapper.setdefault(tree[-1], []).append(VECTOR)

        # Store the discovered properties in the Iteration/Expression tree
        for k, v in list(mapper.items()):
            args = k.args
            # SEQUENTIAL kills PARALLEL
            properties = SEQUENTIAL if (SEQUENTIAL in v or not k.is_Linear) else v
            properties = as_tuple(args.pop('properties')) + as_tuple(properties)
            mapper[k] = Iteration(properties=properties, **args)
        nodes = NestedTransformer(mapper).visit(nodes)

        return {'nodes': nodes}