示例#1
0
    def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
                             static_lbound, static_ubound, inner):
        ecm = codegen_state.expression_to_code_mapper

        from loopy.symbolic import aff_to_expr

        from pymbolic.mapper.stringifier import PREC_NONE
        from cgen import For

        return For(
            "uniform %s %s = %s" %
            (self.target.dtype_to_typename(iname_dtype), iname,
             ecm(aff_to_expr(static_lbound), PREC_NONE, "i")), "%s <= %s" %
            (iname, ecm(aff_to_expr(static_ubound), PREC_NONE, "i")),
            "++%s" % iname, inner)
示例#2
0
文件: __init__.py 项目: shigh/loopy
    def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
                             static_lbound, static_ubound, inner):
        ecm = codegen_state.expression_to_code_mapper

        from loopy.symbolic import aff_to_expr

        from pymbolic import var
        from pymbolic.primitives import Comparison
        from pymbolic.mapper.stringifier import PREC_NONE
        from cgen import For, InlineInitializer

        return For(
            InlineInitializer(POD(self, iname_dtype, iname),
                              ecm(aff_to_expr(static_lbound), PREC_NONE, "i")),
            ecm(Comparison(var(iname), "<=", aff_to_expr(static_ubound)),
                PREC_NONE, "i"), "++%s" % iname, inner)
示例#3
0
def simplify_via_aff(expr):
    from loopy.symbolic import aff_from_expr, aff_to_expr, get_dependencies
    deps = get_dependencies(expr)
    return aff_to_expr(
        aff_from_expr(
            isl.Space.create_from_names(isl.DEFAULT_CONTEXT, list(deps)),
            expr))
示例#4
0
    def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
            static_lbound, static_ubound, inner):
        ecm = codegen_state.expression_to_code_mapper

        from loopy.symbolic import aff_to_expr

        from pymbolic.mapper.stringifier import PREC_NONE
        from cgen import For

        return For(
                "%s %s = %s"
                % (self.target.dtype_to_typename(iname_dtype),
                    iname, ecm(aff_to_expr(static_lbound), PREC_NONE, "i")),
                "%s <= %s" % (
                    iname, ecm(aff_to_expr(static_ubound), PREC_NONE, "i")),
                "++%s" % iname,
                inner)
示例#5
0
文件: ispc.py 项目: shigh/loopy
    def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
                             static_lbound, static_ubound, inner):
        ecm = codegen_state.expression_to_code_mapper

        from loopy.symbolic import aff_to_expr
        from loopy.target.c import POD

        from pymbolic.mapper.stringifier import PREC_NONE
        from cgen import For, Initializer

        from cgen.ispc import ISPCUniform

        return For(
            Initializer(ISPCUniform(POD(self, iname_dtype, iname)),
                        ecm(aff_to_expr(static_lbound), PREC_NONE, "i")),
            ecm(p.Comparison(var(iname), "<=", aff_to_expr(static_ubound)),
                PREC_NONE, "i"), "++%s" % iname, inner)
示例#6
0
文件: python.py 项目: dokempf/loopy
    def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
            static_lbound, static_ubound, inner):
        ecm = codegen_state.expression_to_code_mapper

        from loopy.symbolic import aff_to_expr

        from pymbolic.mapper.stringifier import PREC_NONE
        from genpy import For

        return For(
                (iname,),
                "range(%s, %s + 1)"
                % (
                    ecm(aff_to_expr(static_lbound), PREC_NONE, "i"),
                    ecm(aff_to_expr(static_ubound), PREC_NONE, "i"),
                    ),
                inner)
示例#7
0
    def emit_sequential_loop(self, codegen_state, iname, iname_dtype,
            static_lbound, static_ubound, inner):
        ecm = codegen_state.expression_to_code_mapper

        from loopy.symbolic import aff_to_expr

        from pymbolic.mapper.stringifier import PREC_NONE
        from genpy import For

        return For(
                (iname,),
                "range(%s, %s + 1)"
                % (
                    ecm(aff_to_expr(static_lbound), PREC_NONE, "i"),
                    ecm(aff_to_expr(static_ubound), PREC_NONE, "i"),
                    ),
                inner)
示例#8
0
文件: __init__.py 项目: arghdos/loopy
 def get_constant_iname_length(self, iname):
     from loopy.isl_helpers import static_max_of_pw_aff
     from loopy.symbolic import aff_to_expr
     return int(
         aff_to_expr(
             static_max_of_pw_aff(self.get_iname_bounds(
                 iname, constants_only=True).size,
                                  constants_only=True)))
示例#9
0
def test_aff_to_expr():
    s = isl.Space.create_from_names(isl.Context(), ["a", "b"])
    zero = isl.Aff.zero_on_domain(isl.LocalSpace.from_space(s))
    one = zero.set_constant_val(1)  # noqa
    a = zero.set_coefficient_val(isl.dim_type.in_, 0, 1)
    b = zero.set_coefficient_val(isl.dim_type.in_, 1, 1)

    x = (5 * a + 3 * b) % 17 % 5
    print(x)
    from loopy.symbolic import aff_to_expr
    print(aff_to_expr(x))
示例#10
0
def test_aff_to_expr():
    s = isl.Space.create_from_names(isl.Context(), ["a", "b"])
    zero = isl.Aff.zero_on_domain(isl.LocalSpace.from_space(s))
    one = zero.set_constant_val(1)  # noqa
    a = zero.set_coefficient_val(isl.dim_type.in_, 0, 1)
    b = zero.set_coefficient_val(isl.dim_type.in_, 1, 1)

    x = (5*a + 3*b) % 17 % 5
    print(x)
    from loopy.symbolic import aff_to_expr
    print(aff_to_expr(x))
示例#11
0
def simplify_via_aff(expr):
    from loopy.symbolic import aff_to_expr, guarded_aff_from_expr, get_dependencies
    from loopy.diagnostic import ExpressionToAffineConversionError

    deps = sorted(get_dependencies(expr))
    try:
        return aff_to_expr(
            guarded_aff_from_expr(
                isl.Space.create_from_names(isl.DEFAULT_CONTEXT, list(deps)),
                expr))
    except ExpressionToAffineConversionError:
        return expr
示例#12
0
def determine_temporaries_to_promote(kernel, temporaries, name_gen):
    """
    :returns: A :class:`dict` mapping temporary names from `temporaries` to
              :class:`PromotedTemporary` objects
    """
    new_temporaries = {}

    def_lists, use_lists = get_def_and_use_lists_for_all_temporaries(kernel)

    from loopy.kernel.data import LocalIndexTag

    for temporary in temporaries:
        temporary = kernel.temporary_variables[temporary]
        if temporary.scope == temp_var_scope.GLOBAL:
            # Nothing to be done for global temporaries (I hope)
            continue

        assert temporary.base_storage is None, \
            "Cannot promote temporaries with base_storage to global"

        hw_inames = get_common_hw_inames(kernel,
            def_lists[temporary.name] + use_lists[temporary.name])

        # This takes advantage of the fact that g < l in the alphabet :)
        hw_inames = sorted(hw_inames,
            key=lambda iname: str(kernel.iname_to_tag[iname]))

        shape_prefix = []

        backing_hw_inames = []
        for iname in hw_inames:
            tag = kernel.iname_to_tag[iname]
            is_local_iname = isinstance(tag, LocalIndexTag)
            if is_local_iname and temporary.scope == temp_var_scope.LOCAL:
                # Restrict shape to that of group inames for locals.
                continue
            backing_hw_inames.append(iname)
            from loopy.isl_helpers import static_max_of_pw_aff
            from loopy.symbolic import aff_to_expr
            shape_prefix.append(
                aff_to_expr(
                    static_max_of_pw_aff(
                        kernel.get_iname_bounds(iname).size, False)))

        backing_temporary = PromotedTemporary(
            name=name_gen(temporary.name),
            orig_temporary=temporary,
            shape_prefix=tuple(shape_prefix),
            hw_inames=backing_hw_inames)
        new_temporaries[temporary.name] = backing_temporary

    return new_temporaries
示例#13
0
def determine_temporaries_to_promote(kernel, temporaries, name_gen):
    """
    :returns: A :class:`dict` mapping temporary names from `temporaries` to
              :class:`PromotedTemporary` objects
    """
    new_temporaries = {}

    def_lists, use_lists = get_def_and_use_lists_for_all_temporaries(kernel)

    from loopy.kernel.data import LocalIndexTag

    for temporary in temporaries:
        temporary = kernel.temporary_variables[temporary]
        if temporary.scope == temp_var_scope.GLOBAL:
            # Nothing to be done for global temporaries (I hope)
            continue

        assert temporary.base_storage is None, \
            "Cannot promote temporaries with base_storage to global"

        hw_inames = get_common_hw_inames(
            kernel, def_lists[temporary.name] + use_lists[temporary.name])

        # This takes advantage of the fact that g < l in the alphabet :)
        hw_inames = sorted(hw_inames,
                           key=lambda iname: str(kernel.iname_to_tag[iname]))

        shape_prefix = []

        backing_hw_inames = []
        for iname in hw_inames:
            tag = kernel.iname_to_tag[iname]
            is_local_iname = isinstance(tag, LocalIndexTag)
            if is_local_iname and temporary.scope == temp_var_scope.LOCAL:
                # Restrict shape to that of group inames for locals.
                continue
            backing_hw_inames.append(iname)
            from loopy.isl_helpers import static_max_of_pw_aff
            from loopy.symbolic import aff_to_expr
            shape_prefix.append(
                aff_to_expr(
                    static_max_of_pw_aff(
                        kernel.get_iname_bounds(iname).size, False)))

        backing_temporary = PromotedTemporary(name=name_gen(temporary.name),
                                              orig_temporary=temporary,
                                              shape_prefix=tuple(shape_prefix),
                                              hw_inames=backing_hw_inames)
        new_temporaries[temporary.name] = backing_temporary

    return new_temporaries
示例#14
0
文件: loopy.py 项目: inducer/pytato
def _get_val_in_bset(bset: isl.BasicSet, idim: int) -> ScalarExpression:
    """
    Gets the value of *bset*'s *idim*-th set-dim in terms of it's param-dims.

    .. note::

        Assumes all constraints in *bset* are equality constraints.
    """
    from loopy.symbolic import aff_to_expr

    max_val = bset.dim_max(idim)

    assert max_val.is_equal(bset.dim_min(idim))

    if max_val.n_piece() != 1:
        raise NotImplementedError("Shape inference resulted in a piecewise"
                                  " result.")

    (_, aff), = max_val.get_pieces()

    return aff_to_expr(aff)
示例#15
0
def subst_into_pwaff(new_space, pwaff, subst_dict):
    """
    Returns an instance of :class:`islpy.PwAff` with substitutions from
    *subst_dict* substituted into *pwaff*.

    :arg pwaff: an instance of :class:`islpy.PwAff`
    :arg subst_dict: a mapping from parameters of *pwaff* to
        :class:`pymbolic.primitives.Expression` made up of terms comprising the
        parameters of *new_space*. The expression must be affine in the param
        dims of *new_space*.
    """
    from pymbolic.mapper.substitutor import (SubstitutionMapper,
                                             make_subst_func)
    from loopy.symbolic import aff_from_expr, aff_to_expr
    from functools import reduce

    i_begin_subst_space = pwaff.dim(dim_type.param)
    pwaff, subst_domain, subst_dict = get_param_subst_domain(
        new_space, pwaff, subst_dict)
    subst_mapper = SubstitutionMapper(make_subst_func(subst_dict))
    pwaffs = []

    for valid_set, qpoly in pwaff.get_pieces():
        valid_set = valid_set & subst_domain
        if valid_set.plain_is_empty():
            continue

        valid_set = valid_set.project_out(dim_type.param, 0,
                                          i_begin_subst_space)
        aff = aff_from_expr(valid_set.space, subst_mapper(aff_to_expr(qpoly)))

        pwaffs.append(isl.PwAff.alloc(valid_set, aff))

    if not pwaffs:
        raise ValueError("no pieces of PwAff survived the substitution")

    return reduce(lambda pwaff1, pwaff2: pwaff1.union_add(pwaff2),
                  pwaffs).coalesce()
示例#16
0
 def get_constant_iname_length(self, iname):
     from loopy.isl_helpers import static_max_of_pw_aff
     from loopy.symbolic import aff_to_expr
     return int(aff_to_expr(static_max_of_pw_aff(
             self.get_iname_bounds(iname, constants_only=True).size,
             constants_only=True)))
示例#17
0
def test_aff_to_expr_2():
    from loopy.symbolic import aff_to_expr
    x = isl.Aff("[n] -> { [i0] -> [(-i0 + 2*floor((i0)/2))] }")
    from pymbolic import var
    i0 = var("i0")
    assert aff_to_expr(x) == (-1) * i0 + 2 * (i0 // 2)
示例#18
0
def test_aff_to_expr_2():
    from loopy.symbolic import aff_to_expr
    x = isl.Aff("[n] -> { [i0] -> [(-i0 + 2*floor((i0)/2))] }")
    from pymbolic import var
    i0 = var("i0")
    assert aff_to_expr(x) == (-1)*i0 + 2*(i0 // 2)
示例#19
0
文件: loop.py 项目: navjotk/loopy
def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state):
    ecm = codegen_state.expression_to_code_mapper
    loop_iname = kernel.schedule[sched_index].iname

    slabs = get_slab_decomposition(
            kernel, loop_iname, sched_index, codegen_state)

    from loopy.codegen.bounds import get_usable_inames_for_conditional

    # Note: this does not include loop_iname itself!
    usable_inames = get_usable_inames_for_conditional(kernel, sched_index)
    domain = kernel.get_inames_domain(loop_iname)

    result = []

    for slab_name, slab in slabs:
        cmt = "%s slab for '%s'" % (slab_name, loop_iname)
        if len(slabs) == 1:
            cmt = None

        # {{{ find bounds

        aligned_domain = isl.align_spaces(domain, slab, across_dim_types=True,
                obj_bigger_ok=True)

        dom_and_slab = aligned_domain & slab

        assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions)
        dom_and_slab, assumptions_non_param = isl.align_two(
                dom_and_slab, assumptions_non_param)
        dom_and_slab = dom_and_slab & assumptions_non_param

        # move inames that are usable into parameters
        moved_inames = []
        for iname in dom_and_slab.get_var_names(dim_type.set):
            if iname in usable_inames:
                moved_inames.append(iname)
                dt, idx = dom_and_slab.get_var_dict()[iname]
                dom_and_slab = dom_and_slab.move_dims(
                        dim_type.param, dom_and_slab.dim(dim_type.param),
                        dt, idx, 1)

        _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname]

        from loopy.isl_helpers import (
                static_min_of_pw_aff,
                static_max_of_pw_aff)

        lbound = (
                kernel.cache_manager.dim_min(
                    dom_and_slab, loop_iname_idx)
                .gist(kernel.assumptions)
                .coalesce())
        ubound = (
            kernel.cache_manager.dim_max(
                dom_and_slab, loop_iname_idx)
            .gist(kernel.assumptions)
            .coalesce())

        static_lbound = static_min_of_pw_aff(
                lbound,
                constants_only=False)
        static_ubound = static_max_of_pw_aff(
                ubound,
                constants_only=False)

        # }}}

        # {{{ find implemented slab, build inner code

        from loopy.isl_helpers import make_slab_from_bound_pwaffs

        # impl_slab may be overapproximated
        impl_slab = make_slab_from_bound_pwaffs(
                dom_and_slab.space,
                loop_iname, static_lbound, static_ubound)

        for iname in moved_inames:
            dt, idx = impl_slab.get_var_dict()[iname]
            impl_slab = impl_slab.move_dims(
                    dim_type.set, impl_slab.dim(dim_type.set),
                    dt, idx, 1)

        new_codegen_state = codegen_state.intersect(impl_slab)

        inner = build_loop_nest(
                intersect_kernel_with_slab(
                    kernel, slab, iname),
                sched_index+1, new_codegen_state)

        # }}}

        if cmt is not None:
            from cgen import Comment
            result.append(Comment(cmt))

        from cgen import Initializer, POD, Const, Line
        from loopy.symbolic import aff_to_expr

        if (static_ubound - static_lbound).plain_is_zero():
            # single-trip, generate just a variable assignment, not a loop
            result.append(gen_code_block([
                Initializer(Const(POD(kernel.index_dtype, loop_iname)),
                    ecm(aff_to_expr(static_lbound), PREC_NONE, "i")),
                Line(),
                inner,
                ]))

        else:
            result.append(
                kernel.target.emit_sequential_loop(
                       codegen_state, loop_iname, kernel.index_dtype,
                       static_lbound, static_ubound, inner))

    return gen_code_block(result)
示例#20
0
def determine_temporaries_to_promote(kernel, temporaries, name_gen):
    """
    For each temporary in the passed list of temporaries, construct a
    :class:`PromotedTemporary` which describes how the temporary should
    get promoted into global storage.

    :returns: A :class:`dict` mapping temporary names from `temporaries` to
              :class:`PromotedTemporary` objects
    """
    new_temporaries = {}

    def_lists, use_lists = get_def_and_use_lists_for_all_temporaries(kernel)

    from loopy.kernel.data import LocalIndexTag

    for temporary in temporaries:
        temporary = kernel.temporary_variables[temporary]
        if temporary.scope == temp_var_scope.GLOBAL:
            # Nothing to be done for global temporaries (I hope)
            continue

        assert temporary.base_storage is None, \
            "Cannot promote temporaries with base_storage to global"

        # `hw_inames`: The set of hw-parallel tagged inames that this temporary
        # is associated with. This is used for determining the shape of the
        # global storage needed for saving and restoring the temporary across
        # kernel calls.
        #
        # TODO: Make a policy decision about which dimensions to use. Currently,
        # the code looks at each instruction that defines or uses the temporary,
        # and takes the common set of hw-parallel tagged inames associated with
        # these instructions.
        #
        # Furthermore, in the case of local temporaries, inames that are tagged
        # hw-local do not contribute to the global storage shape.
        hw_inames = get_common_hw_inames(
            kernel, def_lists[temporary.name] + use_lists[temporary.name])

        # This takes advantage of the fact that g < l in the alphabet :)
        hw_inames = sorted(hw_inames,
                           key=lambda iname: str(kernel.iname_to_tag[iname]))

        # Calculate the sizes of the dimensions that get added in front for
        # the global storage of the temporary.
        shape_prefix = []

        backing_hw_inames = []
        for iname in hw_inames:
            tag = kernel.iname_to_tag[iname]
            is_local_iname = isinstance(tag, LocalIndexTag)
            if is_local_iname and temporary.scope == temp_var_scope.LOCAL:
                # Restrict shape to that of group inames for locals.
                continue
            backing_hw_inames.append(iname)
            from loopy.isl_helpers import static_max_of_pw_aff
            from loopy.symbolic import aff_to_expr
            shape_prefix.append(
                aff_to_expr(
                    static_max_of_pw_aff(
                        kernel.get_iname_bounds(iname).size, False)))

        backing_temporary = PromotedTemporary(name=name_gen(temporary.name),
                                              orig_temporary=temporary,
                                              shape_prefix=tuple(shape_prefix),
                                              hw_inames=backing_hw_inames)
        new_temporaries[temporary.name] = backing_temporary

    return new_temporaries
示例#21
0
    def auto_promote_temporary(self, temporary_name):
        temporary = self.kernel.temporary_variables[temporary_name]

        if temporary.scope == temp_var_scope.GLOBAL:
            # Nothing to be done for global temporaries (I hope)
            return None

        if temporary.initializer is not None:
            # Temporaries with initializers do not need saving/reloading - the
            # code generation takes care of emitting the initializers.
            assert temporary.read_only
            return None

        if temporary.base_storage is not None:
            raise ValueError(
                "Cannot promote temporaries with base_storage to global")

        # `hw_inames`: The set of hw-parallel tagged inames that this temporary
        # is associated with. This is used for determining the shape of the
        # global storage needed for saving and restoring the temporary across
        # kernel calls.
        #
        # TODO: Make a policy decision about which dimensions to use. Currently,
        # the code looks at each instruction that defines or uses the temporary,
        # and takes the common set of hw-parallel tagged inames associated with
        # these instructions.
        #
        # Furthermore, in the case of local temporaries, inames that are tagged
        # hw-local do not contribute to the global storage shape.
        hw_inames = self.insn_query.common_hw_inames(
            self.insn_query.insns_reading_or_writing(temporary.name))

        # We want hw_inames to be arranged according to the order:
        #    g.0 < g.1 < ... < l.0 < l.1 < ...
        # Sorting lexicographically accomplishes this.
        hw_inames = sorted(
            hw_inames, key=lambda iname: str(self.kernel.iname_to_tag[iname]))

        # Calculate the sizes of the dimensions that get added in front for
        # the global storage of the temporary.
        hw_dims = []

        backing_hw_inames = []

        for iname in hw_inames:
            tag = self.kernel.iname_to_tag[iname]
            from loopy.kernel.data import LocalIndexTag
            is_local_iname = isinstance(tag, LocalIndexTag)
            if is_local_iname and temporary.scope == temp_var_scope.LOCAL:
                # Restrict shape to that of group inames for locals.
                continue
            backing_hw_inames.append(iname)
            from loopy.isl_helpers import static_max_of_pw_aff
            from loopy.symbolic import aff_to_expr
            hw_dims.append(
                aff_to_expr(
                    static_max_of_pw_aff(
                        self.kernel.get_iname_bounds(iname).size, False)))

        non_hw_dims = temporary.shape

        if len(non_hw_dims) == 0 and len(hw_dims) == 0:
            # Scalar not in hardware: ensure at least one dimension.
            non_hw_dims = (1, )

        backing_temporary = self.PromotedTemporary(
            name=self.var_name_gen(temporary.name + "_save_slot"),
            orig_temporary=temporary,
            hw_dims=tuple(hw_dims),
            non_hw_dims=non_hw_dims,
            hw_inames=backing_hw_inames)

        return backing_temporary
示例#22
0
def generate_sequential_loop_dim_code(codegen_state, sched_index):
    kernel = codegen_state.kernel

    ecm = codegen_state.expression_to_code_mapper
    loop_iname = kernel.schedule[sched_index].iname

    slabs = get_slab_decomposition(kernel, loop_iname)

    from loopy.codegen.bounds import get_usable_inames_for_conditional

    # Note: this does not include loop_iname itself!
    usable_inames = get_usable_inames_for_conditional(kernel, sched_index)
    domain = kernel.get_inames_domain(loop_iname)

    result = []

    for slab_name, slab in slabs:
        cmt = "%s slab for '%s'" % (slab_name, loop_iname)
        if len(slabs) == 1:
            cmt = None

        # {{{ find bounds

        aligned_domain = isl.align_spaces(domain,
                                          slab,
                                          across_dim_types=True,
                                          obj_bigger_ok=True)

        dom_and_slab = aligned_domain & slab

        assumptions_non_param = isl.BasicSet.from_params(kernel.assumptions)
        dom_and_slab, assumptions_non_param = isl.align_two(
            dom_and_slab, assumptions_non_param)
        dom_and_slab = dom_and_slab & assumptions_non_param

        # move inames that are usable into parameters
        moved_inames = []
        for iname in dom_and_slab.get_var_names(dim_type.set):
            if iname in usable_inames:
                moved_inames.append(iname)
                dt, idx = dom_and_slab.get_var_dict()[iname]
                dom_and_slab = dom_and_slab.move_dims(
                    dim_type.param, dom_and_slab.dim(dim_type.param), dt, idx,
                    1)

        _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname]

        from loopy.isl_helpers import (static_min_of_pw_aff,
                                       static_max_of_pw_aff)

        lbound = (kernel.cache_manager.dim_min(
            dom_and_slab, loop_iname_idx).gist(kernel.assumptions).coalesce())
        ubound = (kernel.cache_manager.dim_max(
            dom_and_slab, loop_iname_idx).gist(kernel.assumptions).coalesce())

        static_lbound = static_min_of_pw_aff(lbound, constants_only=False)
        static_ubound = static_max_of_pw_aff(ubound, constants_only=False)

        # }}}

        # {{{ find implemented slab, build inner code

        from loopy.isl_helpers import make_slab_from_bound_pwaffs

        # impl_slab may be overapproximated
        impl_slab = make_slab_from_bound_pwaffs(dom_and_slab.space, loop_iname,
                                                static_lbound, static_ubound)

        for iname in moved_inames:
            dt, idx = impl_slab.get_var_dict()[iname]
            impl_slab = impl_slab.move_dims(dim_type.set,
                                            impl_slab.dim(dim_type.set), dt,
                                            idx, 1)

        new_codegen_state = (codegen_state.intersect(impl_slab).copy(
            kernel=intersect_kernel_with_slab(kernel, slab, iname)))

        inner = build_loop_nest(new_codegen_state, sched_index + 1)

        # }}}

        if cmt is not None:
            result.append(codegen_state.ast_builder.emit_comment(cmt))

        from loopy.symbolic import aff_to_expr

        astb = codegen_state.ast_builder

        if (static_ubound - static_lbound).plain_is_zero():
            # single-trip, generate just a variable assignment, not a loop
            result.append(
                merge_codegen_results(codegen_state, [
                    astb.emit_initializer(codegen_state,
                                          kernel.index_dtype,
                                          loop_iname,
                                          ecm(aff_to_expr(static_lbound),
                                              PREC_NONE, "i"),
                                          is_const=True),
                    astb.emit_blank_line(),
                    inner,
                ]))

        else:
            inner_ast = inner.current_ast(codegen_state)
            result.append(
                inner.with_new_ast(
                    codegen_state,
                    astb.emit_sequential_loop(codegen_state, loop_iname,
                                              kernel.index_dtype,
                                              static_lbound, static_ubound,
                                              inner_ast)))

    return merge_codegen_results(codegen_state, result)
示例#23
0
def simplify_via_aff(expr):
    from loopy.symbolic import aff_from_expr, aff_to_expr, get_dependencies
    deps = get_dependencies(expr)
    return aff_to_expr(aff_from_expr(
        isl.Space.create_from_names(isl.DEFAULT_CONTEXT, list(deps)),
        expr))