Exemplo n.º 1
0
    def _make_copy(self, f, hse, key='', swap=False):
        buf_dims = []
        buf_indices = []
        for d in f.dimensions:
            if d not in hse.loc_indices:
                buf_dims.append(Dimension(name='buf_%s' % d.root))
                buf_indices.append(d.root)
        buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype)

        f_offsets = []
        f_indices = []
        for d in f.dimensions:
            offset = Symbol(name='o%s' % d.root)
            f_offsets.append(offset)
            f_indices.append(offset + (d.root if d not in hse.loc_indices else 0))

        if swap is False:
            eq = DummyEq(buf[buf_indices], f[f_indices])
            name = 'gather%s' % key
        else:
            eq = DummyEq(f[f_indices], buf[buf_indices])
            name = 'scatter%s' % key

        iet = Expression(eq)
        for i, d in reversed(list(zip(buf_indices, buf_dims))):
            # The -1 below is because an Iteration, by default, generates <=
            iet = Iteration(iet, i, d.symbolic_size - 1)
        iet = iet._rebuild(properties=PARALLEL)

        parameters = [buf] + list(buf.shape) + [f] + f_offsets
        return Callable(name, iet, 'void', parameters, ('static',))
Exemplo n.º 2
0
def exprs(a, b):
    return [
        Expression(Eq(a, a + b + 5.)),
        Expression(Eq(a, b - a)),
        Expression(Eq(a, 4 * (b * a))),
        Expression(Eq(a, (6. / b) + (8. * a)))
    ]
Exemplo n.º 3
0
def exprs(dims):
    a = Array(name='a', shape=(3,), dimensions=(dims["i"],)).indexify()
    b = Array(name='b', shape=(3,), dimensions=(dims["i"],)).indexify()
    return [Expression(DummyEq(a, a + b + 5.)),
            Expression(DummyEq(a, b - a)),
            Expression(DummyEq(a, 4 * (b * a))),
            Expression(DummyEq(a, (6. / b) + (8. * a)))]
Exemplo n.º 4
0
    def _make_poke(self, hs, key, msgs):
        lflag = Symbol(name='lflag')
        gflag = Symbol(name='gflag')

        # Init flags
        body = [Expression(DummyEq(lflag, 0)), Expression(DummyEq(gflag, 1))]

        # For each msg, build an Iteration calling MPI_Test on all peers
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            update = AugmentedExpression(DummyEq(gflag, lflag), '&')

            body.append(
                Iteration([testsend, update, testrecv, update], dim,
                          msg.npeers - 1))

        body.append(Return(gflag))

        return make_efunc('pokempi%d' % key, List(body=body), retval='int')
Exemplo n.º 5
0
Arquivo: utils.py Projeto: nw0/devito
def copy_arrays(mapper, reverse=False):
    """
    Build an Iteration/Expression tree performing the copy ``k = v``, or
    ``v = k`` if reverse=True, for each (k, v) in mapper. (k, v) are expected
    to be of type :class:`IndexedData`. The loop bounds are inferred from
    the dimensions used in ``k``.
    """
    if not mapper:
        return ()

    # Build the Iteration tree for the copy
    iterations = []
    for k, v in mapper.items():
        handle = []
        indices = k.function.indices
        for i, j in zip(k.shape, indices):
            handle.append(Iteration([], dimension=j, limits=i))
        lhs, rhs = (v, k) if reverse else (k, v)
        handle.append(
            Expression(Eq(lhs[indices], rhs[indices]), dtype=k.function.dtype))
        iterations.append(compose_nodes(handle))

    # Maybe some Iterations are mergeable
    iterations = MergeOuterIterations().visit(iterations)

    return iterations
Exemplo n.º 6
0
def iet_build(stree):
    """
    Construct an Iteration/Expression tree(IET) from a ScheduleTree.
    """
    nsections = 0
    queues = OrderedDict()
    for i in stree.visit():
        if i == stree:
            # We hit this handle at the very end of the visit
            return List(body=queues.pop(i))

        elif i.is_Exprs:
            exprs = [Increment(e) if e.is_Increment else Expression(e) for e in i.exprs]
            body = ExpressionBundle(i.ispace, i.ops, i.traffic, body=exprs)

        elif i.is_Conditional:
            body = Conditional(i.guard, queues.pop(i))

        elif i.is_Iteration:
            body = Iteration(queues.pop(i), i.dim, i.limits, direction=i.direction,
                             properties=i.properties, uindices=i.sub_iterators)

        elif i.is_Section:
            body = Section('section%d' % nsections, body=queues.pop(i))
            nsections += 1

        elif i.is_Halo:
            body = HaloSpot(i.halo_scheme, body=queues.pop(i))

        queues.setdefault(i.parent, []).append(body)

    assert False
Exemplo n.º 7
0
def iet_make(stree):
    """Create an IET from a ScheduleTree."""
    nsections = 0
    queues = OrderedDict()
    for i in stree.visit():
        if i == stree:
            # We hit this handle at the very end of the visit
            return List(body=queues.pop(i))

        elif i.is_Exprs:
            exprs = [Increment(e) if e.is_Increment else Expression(e) for e in i.exprs]
            body = ExpressionBundle(i.ispace, i.ops, i.traffic, body=exprs)

        elif i.is_Conditional:
            body = Conditional(i.guard, queues.pop(i))

        elif i.is_Iteration:
            # Order to ensure deterministic code generation
            uindices = sorted(i.sub_iterators, key=lambda d: d.name)
            # Generate Iteration
            body = Iteration(queues.pop(i), i.dim, i.limits, offsets=i.offsets,
                             direction=i.direction, properties=i.properties,
                             uindices=uindices)

        elif i.is_Section:
            body = Section('section%d' % nsections, body=queues.pop(i))
            nsections += 1

        elif i.is_Halo:
            body = HaloSpot(i.halo_scheme, body=queues.pop(i))

        queues.setdefault(i.parent, []).append(body)

    assert False
Exemplo n.º 8
0
    def _make_copy(self, f, hse, key, swap=False):
        buf_dims = []
        buf_indices = []
        for d in f.dimensions:
            if d not in hse.loc_indices:
                buf_dims.append(Dimension(name='buf_%s' % d.root))
                buf_indices.append(d.root)
        buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype, padding=0)

        f_offsets = []
        f_indices = []
        for d in f.dimensions:
            offset = Symbol(name='o%s' % d.root)
            f_offsets.append(offset)
            f_indices.append(offset +
                             (d.root if d not in hse.loc_indices else 0))

        if swap is False:
            eq = DummyEq(buf[buf_indices], f[f_indices])
            name = 'gather_%s' % key
        else:
            eq = DummyEq(f[f_indices], buf[buf_indices])
            name = 'scatter_%s' % key

        iet = Expression(eq)
        for i, d in reversed(list(zip(buf_indices, buf_dims))):
            # The -1 below is because an Iteration, by default, generates <=
            iet = Iteration(iet,
                            i,
                            d.symbolic_size - 1,
                            properties=(PARALLEL, AFFINE))

        parameters = [buf] + list(buf.shape) + [f] + f_offsets
        return Callable(name, iet, 'void', parameters, ('static', ))
Exemplo n.º 9
0
def iet_make(stree):
    """
    Create an Iteration/Expression tree (IET) from a :class:`ScheduleTree`.
    """
    nsections = 0
    queues = OrderedDict()
    for i in stree.visit():
        if i == stree:
            # We hit this handle at the very end of the visit
            return List(body=queues.pop(i))

        elif i.is_Exprs:
            exprs = [Expression(e) for e in i.exprs]
            body = [ExpressionBundle(i.shape, i.ops, i.traffic, body=exprs)]

        elif i.is_Conditional:
            body = [Conditional(i.guard, queues.pop(i))]

        elif i.is_Iteration:
            # Order to ensure deterministic code generation
            uindices = sorted(i.sub_iterators, key=lambda d: d.name)
            # Generate Iteration
            body = [Iteration(queues.pop(i), i.dim, i.dim.limits, offsets=i.limits,
                              direction=i.direction, uindices=uindices)]

        elif i.is_Section:
            body = [Section('section%d' % nsections, body=queues.pop(i))]
            nsections += 1

        elif i.is_Halo:
            body = [HaloSpot(i.halo_scheme, body=queues.pop(i))]

        queues.setdefault(i.parent, []).extend(body)

    assert False
Exemplo n.º 10
0
def exprs(a, b, c, d, a_dense, b_dense):
    return [Expression(DummyEq(a, a + b + 5.)),
            Expression(DummyEq(a, b*d - a*c)),
            Expression(DummyEq(b, a + b*b + 3)),
            Expression(DummyEq(a, a*b*d*c)),
            Expression(DummyEq(a, 4 * ((b + d) * (a + c)))),
            Expression(DummyEq(a, (6. / b) + (8. * a))),
            Expression(DummyEq(a_dense, a_dense + b_dense + 5.))]
Exemplo n.º 11
0
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        ops_block = OpsBlock('block')

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for section, trees in find_affine_trees(iet).items():
            dims.append(len(trees[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(trees[0].root))
            symbols -= set(FindSymbols('defines').visit(trees[0].root))
            to_dat |= symbols

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block))

        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            pre_loop, ops_kernel = opsit(trees, n)

            pre_time_loop.extend(pre_loop)
            self._ops_kernels.append(ops_kernel)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.append('stdio.h')

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            ops_exit
        ]

        return List(body=body)
Exemplo n.º 12
0
    def _schedule_expressions(self, clusters):
        """Create an Iteartion/Expression tree given an iterable of
        :class:`Cluster` objects."""

        # Build the Iteration/Expression tree
        processed = []
        schedule = OrderedDict()
        for i in clusters:
            # Build the Expression objects to be inserted within an Iteration tree
            expressions = [
                Expression(v, np.int32 if i.trace.is_index(k) else self.dtype)
                for k, v in i.trace.items()
            ]

            if not i.stencil.empty:
                root = None
                entries = i.stencil.entries

                # Can I reuse any of the previously scheduled Iterations ?
                index = 0
                for j0, j1 in zip(entries, list(schedule)):
                    if j0 != j1 or j0.dim in clusters.atomics[i]:
                        break
                    root = schedule[j1]
                    index += 1
                needed = entries[index:]

                # Build and insert the required Iterations
                iters = [
                    Iteration([], j.dim, j.dim.limits, offsets=j.ofs)
                    for j in needed
                ]
                body, tree = compose_nodes(iters + [expressions],
                                           retrieve=True)
                scheduling = OrderedDict(zip(needed, tree))
                if root is None:
                    processed.append(body)
                    schedule = scheduling
                else:
                    nodes = list(root.nodes) + [body]
                    mapper = {root: root._rebuild(nodes, **root.args_frozen)}
                    transformer = Transformer(mapper)
                    processed = list(transformer.visit(processed))
                    schedule = OrderedDict(
                        list(schedule.items())[:index] +
                        list(scheduling.items()))
                    for k, v in list(schedule.items()):
                        schedule[k] = transformer.rebuilt.get(v, v)
            else:
                # No Iterations are needed
                processed.extend(expressions)

        return List(body=processed)
Exemplo n.º 13
0
def make_grid_accesses(node):
    """
    Construct a new Iteration/Expression based on ``node``, in which all
    :class:`types.Indexed` accesses have been converted into YASK grid
    accesses.
    """
    def make_grid_gets(expr):
        mapper = {}
        indexeds = retrieve_indexed(expr)
        data_carriers = [i for i in indexeds if i.base.function.from_YASK]
        for i in data_carriers:
            name = namespace['code-grid-name'](i.base.function.name)
            args = [
                ListInitializer([INT(make_grid_gets(j)) for j in i.indices])
            ]
            mapper[i] = make_sharedptr_funcall(namespace['code-grid-get'],
                                               args, name)
        return expr.xreplace(mapper)

    mapper = {}
    for i, e in enumerate(FindNodes(Expression).visit(node)):
        lhs, rhs = e.expr.args

        # RHS translation
        rhs = make_grid_gets(rhs)

        # LHS translation
        if e.write.from_YASK:
            name = namespace['code-grid-name'](e.write.name)
            args = [rhs]
            args += [
                ListInitializer([INT(make_grid_gets(i)) for i in lhs.indices])
            ]
            handle = make_sharedptr_funcall(namespace['code-grid-put'], args,
                                            name)
            processed = ForeignExpression(handle,
                                          e.dtype,
                                          is_Increment=e.is_increment)
        else:
            # Writing to a scalar temporary
            processed = Expression(e.expr.func(lhs, rhs))

        mapper.update({e: processed})

    return Transformer(mapper).visit(node)
Exemplo n.º 14
0
def iet_make(stree):
    """
    Create an Iteration/Expression tree (IET) from a :class:`ScheduleTree`.
    """
    nsections = 0
    queues = OrderedDict()
    for i in stree.visit():
        if i == stree:
            # We hit this handle at the very end of the visit
            return List(body=queues.pop(i))

        elif i.is_Exprs:
            exprs = [Expression(e) for e in i.exprs]
            body = [ExpressionBundle(i.shape, i.ops, i.traffic, body=exprs)]

        elif i.is_Conditional:
            body = [Conditional(i.guard, queues.pop(i))]

        elif i.is_Iteration:
            # Generate `uindices`
            uindices = []
            for d, offs in i.sub_iterators:
                modulo = len(offs)
                for n, o in enumerate(filter_ordered(offs)):
                    value = (i.dim + o) % modulo
                    symbol = Scalar(name="%s%d" % (d.name, n), dtype=np.int32)
                    uindices.append(
                        UnboundedIndex(symbol, value, value, d, d + o))
            # Generate Iteration
            body = [
                Iteration(queues.pop(i),
                          i.dim,
                          i.dim.limits,
                          offsets=i.limits,
                          direction=i.direction,
                          uindices=uindices)
            ]

        elif i.is_Section:
            body = [Section('section%d' % nsections, body=queues.pop(i))]
            nsections += 1

        queues.setdefault(i.parent, []).extend(body)

    assert False
Exemplo n.º 15
0
    def _make_copy(self, f, fixed, swap=False):
        """
        Construct a Callable performing a copy of:

            * an arbitrary convex region of ``f`` into a contiguous Array, OR
            * if ``swap=True``, a contiguous Array into an arbitrary convex
              region of ``f``.
        """
        buf_dims = []
        buf_indices = []
        for d in f.dimensions:
            if d not in fixed:
                buf_dims.append(Dimension(name='buf_%s' % d.root))
                buf_indices.append(d.root)
        buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype)

        f_offsets = []
        f_indices = []
        for d in f.dimensions:
            offset = Symbol(name='o%s' % d.root)
            f_offsets.append(offset)
            f_indices.append(offset + (d.root if d not in fixed else 0))

        if swap is False:
            eq = DummyEq(buf[buf_indices], f[f_indices])
            name = 'gather%dd' % f.ndim
        else:
            eq = DummyEq(f[f_indices], buf[buf_indices])
            name = 'scatter%dd' % f.ndim

        iet = Expression(eq)
        for i, d in reversed(list(zip(buf_indices, buf_dims))):
            # The -1 below is because an Iteration, by default, generates <=
            iet = Iteration(iet, i, d.symbolic_size - 1, properties=PARALLEL)
        iet = List(body=[ArrayCast(f), ArrayCast(buf), iet])

        # Optimize the memory copy with the DLE
        from devito.dle import transform
        state = transform(iet, 'simd', {'openmp': self._threaded})

        parameters = [buf] + list(buf.shape) + [f] + f_offsets + state.input
        return Callable(name, state.nodes, 'void', parameters,
                        ('static', )), state.input
Exemplo n.º 16
0
def copy(f, fixed, swap=False):
    """
    Construct a :class:`Callable` capable of copying: ::

        * an arbitrary convex region of ``f`` into a contiguous :class:`Array`, OR
        * if ``swap=True``, a contiguous :class:`Array` into an arbitrary convex
          region of ``f``.
    """
    buf_dims = []
    buf_indices = []
    for d in f.dimensions:
        if d not in fixed:
            buf_dims.append(Dimension(name='buf_%s' % d.root))
            buf_indices.append(d.root)
    buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype)

    dat_dims = []
    dat_offsets = []
    dat_indices = []
    for d in f.dimensions:
        dat_dims.append(Dimension(name='dat_%s' % d.root))
        offset = Symbol(name='o%s' % d.root)
        dat_offsets.append(offset)
        dat_indices.append(offset + (d.root if d not in fixed else 0))
    dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype)

    if swap is False:
        eq = DummyEq(buf[buf_indices], dat[dat_indices])
        name = 'gather_%s' % f.name
    else:
        eq = DummyEq(dat[dat_indices], buf[buf_indices])
        name = 'scatter_%s' % f.name

    iet = Expression(eq)
    for i, d in reversed(list(zip(buf_indices, buf_dims))):
        iet = Iteration(iet, i,
                        d.symbolic_size - 1)  # -1 as Iteration generates <=
    iet = List(body=[ArrayCast(dat), ArrayCast(buf), iet])
    parameters = [buf] + list(buf.shape) + [dat] + list(
        dat.shape) + dat_offsets
    return Callable(name, iet, 'void', parameters, ('static', ))
Exemplo n.º 17
0
def test_loops_collapsed(fe, t0, t1, t2, t3, exprs, expected, iters):
    scope = [fe, t0, t1, t2, t3]
    node_exprs = [Expression(DummyEq(EVAL(i, *scope))) for i in exprs]
    ast = iters[6](iters[7](iters[8](node_exprs)))

    ast = iet_analyze(ast)

    nodes = transform(ast, mode='openmp').nodes
    iterations = FindNodes(Iteration).visit(nodes)
    assert len(iterations) == len(expected)

    # Check for presence of pragma omp
    for i, j in zip(iterations, expected):
        pragmas = i.pragmas
        if j is True:
            assert len(pragmas) == 1
            pragma = pragmas[0]
            assert 'omp for collapse' in pragma.value
        else:
            for k in pragmas:
                assert 'omp for collapse' not in k.value
Exemplo n.º 18
0
    def test_iterations_ompized(self, fa, fb, fc, fd, t0, t1, t2, t3, exprs,
                                expected, iters):
        scope = [fa, fb, fc, fd, t0, t1, t2, t3]
        node_exprs = [Expression(DummyEq(EVAL(i, *scope))) for i in exprs]
        ast = iters[6](iters[7](node_exprs))

        ast = iet_analyze(ast)

        iet, _ = transform(ast, mode='openmp')
        iterations = FindNodes(Iteration).visit(iet)
        assert len(iterations) == len(expected)

        # Check for presence of pragma omp
        for i, j in zip(iterations, expected):
            pragmas = i.pragmas
            if j is True:
                assert len(pragmas) == 1
                pragma = pragmas[0]
                assert 'omp for' in pragma.value
            else:
                for k in pragmas:
                    assert 'omp for' not in k.value
Exemplo n.º 19
0
 def test_conditional(self, fc):
     then_body = Expression(DummyEq(fc[x, y], fc[x, y] + 1))
     else_body = Expression(DummyEq(fc[x, y], fc[x, y] + 2))
     conditional = Conditional(x < 3, then_body, else_body)
     assert str(conditional) == """\
Exemplo n.º 20
0
def iet_make(clusters, dtype):
    """
    Create an Iteration/Expression tree (IET) given an iterable of :class:`Cluster`s.

    :param clusters: The iterable :class:`Cluster`s for which the IET is built.
    :param dtype: The data type of the scalar expressions.
    """
    processed = []
    schedule = OrderedDict()
    for cluster in clusters:
        if not cluster.ispace.empty:
            root = None
            intervals = cluster.ispace.intervals

            # Can I reuse any of the previously scheduled Iterations ?
            index = 0
            for i0, i1 in zip(intervals, list(schedule)):
                if i0 != i1 or i0.dim in clusters.atomics[cluster]:
                    break
                root = schedule[i1]
                index += 1
            needed = intervals[index:]

            # Build Iterations, including any necessary unbounded index
            iters = []
            for i in needed:
                uindices = []
                for j, offs in cluster.ispace.sub_iterators.get(i.dim, []):
                    for n, o in enumerate(filter_ordered(offs)):
                        name = "%s%d" % (j.name, n)
                        vname = Scalar(name=name, dtype=np.int32)
                        value = (i.dim + o) % j.modulo
                        uindices.append(UnboundedIndex(vname, value, value, j, j + o))
                iters.append(Iteration([], i.dim, i.dim.limits, offsets=i.limits,
                                       uindices=uindices))

            # Build Expressions
            exprs = [Expression(v, np.int32 if cluster.trace.is_index(k) else dtype)
                     for k, v in cluster.trace.items()]

            # Compose Iterations and Expressions
            body, tree = compose_nodes(iters + [exprs], retrieve=True)

            # Update the current scheduling
            scheduling = OrderedDict(zip(needed, tree))
            if root is None:
                processed.append(body)
                schedule = scheduling
            else:
                nodes = list(root.nodes) + [body]
                mapper = {root: root._rebuild(nodes, **root.args_frozen)}
                transformer = Transformer(mapper)
                processed = list(transformer.visit(processed))
                schedule = OrderedDict(list(schedule.items())[:index] +
                                       list(scheduling.items()))
                for k, v in list(schedule.items()):
                    schedule[k] = transformer.rebuilt.get(v, v)
        else:
            # No Iterations are needed
            processed.extend([Expression(e, dtype) for e in cluster.exprs])

    return List(body=processed)
Exemplo n.º 21
0
    def _loop_fission(self, nodes, state):
        """
        Apply loop fission to innermost :class:`Iteration` objects. This pass
        is not applied if the number of statements in an Iteration's body is
        lower than ``self.thresholds['fission'].``
        """

        mapper = {}
        for tree in retrieve_iteration_tree(nodes):
            if len(tree) <= 1:
                # Heuristically avoided
                continue

            candidate = tree[-1]
            expressions = [e for e in candidate.nodes if e.is_Expression]

            if len(expressions) < self.thresholds['max_fission']:
                # Heuristically avoided
                continue
            if len(expressions) != len(candidate.nodes):
                # Dangerous for correctness
                continue

            functions = list(
                set.union(*[set(e.functions) for e in expressions]))
            wrapped = [e.expr for e in expressions]

            if not functions or not wrapped:
                # Heuristically avoided
                continue

            # Promote temporaries from scalar to tensors
            handle = functions[0]
            dim = handle.indices[-1]
            size = handle.shape[-1]
            if any(dim != i.indices[-1] for i in functions):
                # Dangerous for correctness
                continue

            wrapped = promote_scalar_expressions(wrapped, (size, ), (dim, ),
                                                 True)

            assert len(wrapped) == len(expressions)
            rebuilt = [
                Expression(s, e.dtype) for s, e in zip(wrapped, expressions)
            ]

            # Group statements
            # TODO: Need a heuristic here to maximize reuse
            args_frozen = candidate.args_frozen
            properties = as_tuple(args_frozen['properties']) + (ELEMENTAL, )
            args_frozen['properties'] = properties
            n = self.thresholds['min_fission']
            fissioned = [
                Iteration(g, **args_frozen) for g in grouper(rebuilt, n)
            ]

            mapper[candidate] = List(body=fissioned)

        processed = Transformer(mapper).visit(nodes)

        return processed, {}
Exemplo n.º 22
0
def iet_make(clusters, dtype):
    """
    Create an Iteration/Expression tree (IET) given an iterable of :class:`Cluster`s.

    :param clusters: The iterable :class:`Cluster`s for which the IET is built.
    :param dtype: The data type of the scalar expressions.
    """
    processed = []
    schedule = OrderedDict()
    for cluster in clusters:
        if not cluster.ispace.empty:
            root = None
            intervals = cluster.ispace.intervals

            # Can I reuse any of the previously scheduled Iterations ?
            index = 0
            for i0, i1 in zip(intervals, list(schedule)):
                if i0 != i1 or i0.dim in cluster.atomics:
                    break
                root = schedule[i1]
                index += 1
            needed = intervals[index:]

            # Build Expressions
            body = [
                Expression(
                    e, np.int32 if cluster.trace.is_index(e.lhs) else dtype)
                for e in cluster.exprs
            ]
            if not needed:
                body = List(body=body)

            # Build Iterations
            scheduling = []
            for i in reversed(needed):
                # Prepare any necessary unbounded index
                uindices = []
                for j, offs in cluster.ispace.sub_iterators.get(i.dim, []):
                    modulo = len(offs)
                    for n, o in enumerate(filter_ordered(offs)):
                        name = "%s%d" % (j.name, n)
                        vname = Scalar(name=name, dtype=np.int32)
                        value = (i.dim + o) % modulo
                        uindices.append(
                            UnboundedIndex(vname, value, value, j, j + o))

                # Retrieve the iteration direction
                direction = cluster.ispace.directions[i.dim]

                # Update IET and scheduling
                if i.dim in cluster.guards:
                    # Must wrap within an if-then scope
                    body = Conditional(cluster.guards[i.dim], body)
                    iteration = Iteration(body,
                                          i.dim,
                                          i.dim.limits,
                                          offsets=i.limits,
                                          direction=direction,
                                          uindices=uindices)
                    # Adding (None, None) ensures that nested iterations won't
                    # be reused by the next cluster
                    scheduling.extend([(None, None), (i, iteration)])
                else:
                    iteration = Iteration(body,
                                          i.dim,
                                          i.dim.limits,
                                          offsets=i.limits,
                                          direction=direction,
                                          uindices=uindices)
                    scheduling.append((i, iteration))

                # Prepare for next dimension
                body = iteration

            # If /needed/ is != [], root.dim might be a guarded dimension for /cluster/
            if root is not None and root.dim in cluster.guards:
                body = Conditional(cluster.guards[root.dim], body)

            # Update the current schedule
            scheduling = OrderedDict(reversed(scheduling))
            if root is None:
                processed.append(body)
                schedule = scheduling
            else:
                nodes = list(root.nodes) + [body]
                mapper = {root: root._rebuild(nodes, **root.args_frozen)}
                transformer = Transformer(mapper)
                processed = list(transformer.visit(processed))
                schedule = OrderedDict(
                    list(schedule.items())[:index] + list(scheduling.items()))
                for k, v in list(schedule.items()):
                    schedule[k] = transformer.rebuilt.get(v, v)
        else:
            # No Iterations are needed
            processed.extend([Expression(e, dtype) for e in cluster.exprs])

    return List(body=processed)
Exemplo n.º 23
0
def iet_make(clusters):
    """
    Create an Iteration/Expression tree (IET) given an iterable of :class:`Cluster`s.

    :param clusters: The iterable :class:`Cluster`s for which the IET is built.
    """
    # {Iteration -> [c0, c1, ...]}, shared clusters
    shared = {}
    # The constructed IET
    processed = []
    # {Interval -> Iteration}, carried from preceding cluster
    schedule = OrderedDict()

    # Build IET
    for cluster in clusters:
        body = [Expression(e) for e in cluster.exprs]

        if cluster.ispace.empty:
            # No Iterations are needed
            processed.extend(body)
            continue

        root = None
        itintervals = cluster.ispace.iteration_intervals

        # Can I reuse any of the previously scheduled Iterations ?
        index = 0
        for i0, i1 in zip(itintervals, list(schedule)):
            if i0 != i1 or i0.dim in cluster.atomics:
                break
            root = schedule[i1]
            index += 1
        needed = itintervals[index:]

        # Build Expressions
        if not needed:
            body = List(body=body)

        # Build Iterations
        scheduling = []
        for i in reversed(needed):
            # Update IET and scheduling
            if i.dim in cluster.guards:
                # Must wrap within an if-then scope
                body = Conditional(cluster.guards[i.dim], body)
                # Adding (None, None) ensures that nested iterations won't
                # be reused by the next cluster
                scheduling.insert(0, (None, None))
            iteration = Iteration(body,
                                  i.dim,
                                  i.dim.limits,
                                  offsets=i.limits,
                                  direction=i.direction)
            scheduling.insert(0, (i, iteration))

            # Prepare for next dimension
            body = iteration

        # If /needed/ is != [], root.dim might be a guarded dimension for /cluster/
        if root is not None and root.dim in cluster.guards:
            body = Conditional(cluster.guards[root.dim], body)

        # Update the current schedule
        if root is None:
            processed.append(body)
        else:
            nodes = list(root.nodes) + [body]
            transf = Transformer(
                {root: root._rebuild(nodes, **root.args_frozen)})
            processed = list(transf.visit(processed))
            scheduling = list(schedule.items())[:index] + list(scheduling)
            scheduling = [(k, transf.rebuilt.get(v, v)) for k, v in scheduling]
            shared = {transf.rebuilt.get(k, k): v for k, v in shared.items()}
        schedule = OrderedDict(scheduling)

        # Record that /cluster/ was used to build the iterations in /schedule/
        shared.update(
            {i: shared.get(i, []) + [cluster]
             for i in schedule.values() if i})
    iet = List(body=processed)

    # Add in unbounded indices, if needed
    mapper = {}
    for k, v in shared.items():
        uindices = []
        ispace = IterationSpace.merge(*[i.ispace.project([k.dim]) for i in v])
        for j, offs in ispace.sub_iterators.get(k.dim, []):
            modulo = len(offs)
            for n, o in enumerate(filter_ordered(offs)):
                name = "%s%d" % (j.name, n)
                vname = Scalar(name=name, dtype=np.int32)
                value = (k.dim + o) % modulo
                uindices.append(UnboundedIndex(vname, value, value, j, j + o))
        mapper[k] = k._rebuild(uindices=uindices)
    iet = NestedTransformer(mapper).visit(iet)

    return iet
Exemplo n.º 24
0
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        affine_trees = find_affine_trees(iet).items()

        # If there is no affine trees, then there is no loop to be optimized using OPS.
        if not affine_trees:
            return iet

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for _, tree in affine_trees:
            dims.append(len(tree[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(tree[0].root))
            symbols -= set(FindSymbols('defines').visit(tree[0].root))
            to_dat |= symbols

        # Create the OPS block for this problem
        ops_block = OpsBlock('block')
        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        after_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(
                list(create_ops_dat(f, name_to_ops_dat, ops_block)))
            # To return the result to Devito, it is necessary to copy the data
            # from the dat object back to the CPU memory.
            after_time_loop.extend(
                create_ops_fetch(f, name_to_ops_dat,
                                 self.time_dimension.extreme_max))

        # Generate ops kernels for each offloadable iteration tree
        mapper = {}
        for n, (_, tree) in enumerate(affine_trees):
            pre_loop, ops_kernel, ops_par_loop_call = opsit(
                tree, n, name_to_ops_dat, ops_block, dims[0])

            pre_time_loop.extend(pre_loop)
            self._func_table[namespace['ops_kernel_file'](ops_kernel.name)] = \
                MetaCall(ops_kernel, False)
            mapper[tree[0].root] = ops_par_loop_call
            mapper.update({i.root: mapper.get(i.root)
                           for i in tree})  # Drop trees

        iet = Transformer(mapper).visit(iet)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.extend(['stdio.h', 'ops_seq.h'])

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            *after_time_loop, ops_exit
        ]

        return List(body=body)
Exemplo n.º 25
0
def opsit(trees, count):
    node_factory = OPSNodeFactory()
    expressions = []
    for tree in trees:
        expressions.extend(FindNodes(Expression).visit(tree.inner))

    it_range = []
    it_dims = 0
    for tree in trees:
        if isinstance(tree, IterationTree):
            it_range = [it.bounds() for it in tree]
            it_dims = len(tree)

    block = OPSBlock(namespace['ops_block'](count))
    block_init = Element(
        cgen.Initializer(
            block, Call("ops_decl_block",
                        [it_dims, String(block.name)], False)))

    ops_expressions = []
    accesses = defaultdict(set)

    for i in reversed(expressions):
        extend_accesses(accesses, get_accesses(i.expr))
        ops_expressions.insert(0,
                               Expression(make_ops_ast(i.expr, node_factory)))

    ops_stencils_initializers, ops_stencils = generate_ops_stencils(accesses)

    to_remove = [
        f.name for f in FindSymbols('defines').visit(List(body=expressions))
    ]

    parameters = FindSymbols('symbolics').visit(List(body=ops_expressions))
    parameters = [
        p for p in parameters
        if p.name != 'OPS_ACC_size' and p.name not in to_remove
    ]
    parameters = sorted(parameters, key=lambda i: (i.is_Constant, i.name))

    arguments = FindSymbols('symbolics').visit(List(body=expressions))
    arguments = [a for a in arguments if a.name not in to_remove]
    arguments = sorted(arguments, key=lambda i: (i.is_Constant, i.name))

    ops_expressions = [
        Expression(fix_ops_acc(e.expr, [p.name for p in parameters]))
        for e in ops_expressions
    ]

    callable_kernel = Callable(namespace['ops_kernel'](count), ops_expressions,
                               "void", parameters)

    dat_declarations = []
    argname_to_dat = {}

    for a in arguments:
        if a.is_Constant:
            continue

        dat_dec, dat_sym = to_ops_dat(a, block)
        dat_declarations.extend(dat_dec)

        argname_to_dat.update(dat_sym)

    par_loop_range_arr = SymbolicArray(name=namespace['ops_range'](count),
                                       dimensions=(len(it_range) * 2, ),
                                       dtype=np.int32)
    range_vals = []
    for mn, mx in it_range:
        range_vals.append(mn)
        range_vals.append(mx)
    par_loop_range_init = Expression(
        ClusterizedEq(Eq(par_loop_range_arr, ListInitializer(range_vals))))

    ops_args = get_ops_args([p for p in parameters], ops_stencils,
                            argname_to_dat)

    par_loop = Call("ops_par_loop", [
        FunctionPointer(callable_kernel.name),
        String(callable_kernel.name), block, it_dims, par_loop_range_arr,
        *ops_args
    ])

    return (callable_kernel,
            [par_loop_range_init, block_init] + ops_stencils_initializers +
            dat_declarations + [Call("ops_partition", [String("")])],
            List(body=[par_loop]), it_dims)
Exemplo n.º 26
0
def to_ops_dat(function, block):
    ndim = function.ndim - (1 if function.is_TimeFunction else 0)
    dim = SymbolicArray(name="%s_dim" % function.name,
                        dimensions=(ndim, ),
                        dtype=np.int32)

    base = SymbolicArray(name="%s_base" % function.name,
                         dimensions=(ndim, ),
                         dtype=np.int32)

    d_p = SymbolicArray(name="%s_d_p" % function.name,
                        dimensions=(ndim, ),
                        dtype=np.int32)

    d_m = SymbolicArray(name="%s_d_m" % function.name,
                        dimensions=(ndim, ),
                        dtype=np.int32)

    res = []
    dats = {}
    ops_decl_dat_call = []

    if function.is_TimeFunction:
        time_pos = function._time_position
        time_index = function.indices[time_pos]
        time_dims = function.shape[time_pos]

        dim_shape = function.shape[:time_pos] + function.shape[time_pos + 1:]
        padding = function.padding[:time_pos] + function.padding[time_pos + 1:]
        halo = function.halo[:time_pos] + function.halo[time_pos + 1:]
        base_val = [0 for i in range(ndim)]
        d_p_val = tuple([p[0] + h[0] for p, h in zip(padding, halo)])
        d_m_val = tuple([-(p[1] + h[1]) for p, h in zip(padding, halo)])

        ops_dat_array = SymbolicArray(
            name="%s_dat" % function.name,
            dimensions=[time_dims],
            dtype="ops_dat",
        )

        ops_decl_dat_call.append(
            Element(
                cgen.Statement(
                    "%s %s[%s]" %
                    (ops_dat_array.dtype, ops_dat_array.name, time_dims))))

        for i in range(time_dims):
            access = FunctionTimeAccess(function, i)
            ops_dat_access = ArrayAccess(ops_dat_array, i)
            call = Call("ops_decl_dat", [
                block, 1, dim, base, d_m, d_p, access,
                String(function._C_typedata),
                String("%s%s%s" % (function.name, time_index, i))
            ], False)
            dats["%s%s%s" % (function.name, time_index, i)] = ArrayAccess(
                ops_dat_array, Symbol("%s%s" % (time_index, i)))
            ops_decl_dat_call.append(Element(cgen.Assign(ops_dat_access,
                                                         call)))
    else:
        ops_dat = OPSDat("%s_dat" % function.name)
        dats[function.name] = ops_dat

        d_p_val = tuple(
            [p[0] + h[0] for p, h in zip(function.padding, function.halo)])
        d_m_val = tuple(
            [-(p[1] + h[1]) for p, h in zip(function.padding, function.halo)])
        dim_shape = function.shape
        base_val = [0 for i in function.shape]

        ops_decl_dat_call.append(
            Element(
                cgen.Initializer(
                    ops_dat,
                    Call("ops_decl_dat", [
                        block, 1, dim, base, d_m, d_p,
                        FunctionTimeAccess(function, 0),
                        String(function._C_typedata),
                        String(function.name)
                    ], False))))

    res.append(Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_shape)))))
    res.append(Expression(ClusterizedEq(Eq(base, ListInitializer(base_val)))))
    res.append(Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val)))))
    res.append(Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val)))))
    res.extend(ops_decl_dat_call)

    return res, dats
Exemplo n.º 27
0
 def test_conditional(self, fc, grid):
     x, y, _ = grid.dimensions
     then_body = Expression(DummyEq(fc[x, y], fc[x, y] + 1))
     else_body = Expression(DummyEq(fc[x, y], fc[x, y] + 2))
     conditional = Conditional(x < 3, then_body, else_body)
     assert str(conditional) == """\
Exemplo n.º 28
0
def make_ops_kernels(iet):
    warning("The OPS backend is still work-in-progress")

    affine_trees = find_affine_trees(iet).items()

    # If there is no affine trees, then there is no loop to be optimized using OPS.
    if not affine_trees:
        return iet, {}

    ops_init = Call(namespace['ops_init'], [0, 0, 2])
    ops_partition = Call(namespace['ops_partition'], Literal('""'))
    ops_exit = Call(namespace['ops_exit'])

    # Extract all symbols that need to be converted to ops_dat
    dims = []
    to_dat = set()
    for _, tree in affine_trees:
        dims.append(len(tree[0].dimensions))
        symbols = set(FindSymbols('symbolics').visit(tree[0].root))
        symbols -= set(FindSymbols('defines').visit(tree[0].root))
        to_dat |= symbols

    # Create the OPS block for this problem
    ops_block = OpsBlock('block')
    ops_block_init = Expression(
        ClusterizedEq(
            Eq(ops_block, namespace['ops_decl_block'](dims[0],
                                                      Literal('"block"')))))

    # To ensure deterministic code generation we order the datasets to
    # be generated (since a set is an unordered collection)
    to_dat = filter_sorted(to_dat)

    name_to_ops_dat = {}
    pre_time_loop = []
    after_time_loop = []
    for f in to_dat:
        if f.is_Constant:
            continue

        pre_time_loop.extend(
            list(create_ops_dat(f, name_to_ops_dat, ops_block)))
        # Copy data from device to host
        after_time_loop.extend(
            create_ops_fetch(f, name_to_ops_dat, f.grid.time_dim.extreme_max))

    # Generate ops kernels for each offloadable iteration tree
    mapper = {}
    ffuncs = []
    for n, (_, tree) in enumerate(affine_trees):
        pre_loop, ops_kernel, ops_par_loop_call = opsit(
            tree, n, name_to_ops_dat, ops_block, dims[0])

        pre_time_loop.extend(pre_loop)
        ffuncs.append(ops_kernel)
        mapper[tree[0].root] = ops_par_loop_call
        mapper.update({i.root: mapper.get(i.root) for i in tree})  # Drop trees

    iet = Transformer(mapper).visit(iet)

    assert (d == dims[0] for d in dims), \
        "The OPS backend currently assumes that all kernels \
        have the same number of dimensions"

    iet = iet._rebuild(body=flatten([
        ops_init, ops_block_init, pre_time_loop, ops_partition, iet.body,
        after_time_loop, ops_exit
    ]))

    return iet, {
        'includes': ['stdio.h', 'ops_seq.h'],
        'ffuncs': ffuncs,
        'headers': [namespace['ops_define_dimension'](dims[0])]
    }