Пример #1
0
    def process_set(s):
        var_dict = s.get_var_dict()

        try:
            dt, idx = var_dict[name]
        except KeyError:
            return s

        value_aff = isl.Aff.zero_on_domain(s.space) + value

        from loopy.isl_helpers import iname_rel_aff

        name_equal_value_aff = iname_rel_aff(s.space, name, "==", value_aff)

        s = s.add_constraint(isl.Constraint.equality_from_aff(name_equal_value_aff)).project_out(dt, idx, 1)

        return s
Пример #2
0
    def process_set(s):
        var_dict = s.get_var_dict()

        try:
            dt, idx = var_dict[name]
        except KeyError:
            return s

        value_aff = isl.Aff.zero_on_domain(s.space) + value

        from loopy.isl_helpers import iname_rel_aff
        name_equal_value_aff = iname_rel_aff(s.space, name, "==", value_aff)

        s = (s.add_constraint(
            isl.Constraint.equality_from_aff(
                name_equal_value_aff)).project_out(dt, idx, 1))

        return s
Пример #3
0
    def fix(self, iname, aff):
        new_impl_domain = self.implemented_domain

        impl_space = self.implemented_domain.get_space()
        if iname not in impl_space.get_var_dict():
            new_impl_domain = (new_impl_domain.add_dims(
                isl.dim_type.set,
                1).set_dim_name(isl.dim_type.set,
                                new_impl_domain.dim(isl.dim_type.set), iname))
            impl_space = new_impl_domain.get_space()

        from loopy.isl_helpers import iname_rel_aff
        iname_plus_lb_aff = iname_rel_aff(impl_space, iname, "==", aff)

        from loopy.symbolic import pw_aff_to_expr
        cns = isl.Constraint.equality_from_aff(iname_plus_lb_aff)
        expr = pw_aff_to_expr(aff)

        new_impl_domain = new_impl_domain.add_constraint(cns)
        return self.copy_and_assign(
            iname, expr).copy(implemented_domain=new_impl_domain)
Пример #4
0
    def fix(self, iname, aff):
        new_impl_domain = self.implemented_domain

        impl_space = self.implemented_domain.get_space()
        if iname not in impl_space.get_var_dict():
            new_impl_domain = new_impl_domain.add_dims(isl.dim_type.set, 1).set_dim_name(
                isl.dim_type.set, new_impl_domain.dim(isl.dim_type.set), iname
            )
            impl_space = new_impl_domain.get_space()

        from loopy.isl_helpers import iname_rel_aff

        iname_plus_lb_aff = iname_rel_aff(impl_space, iname, "==", aff)

        from loopy.symbolic import pw_aff_to_expr

        cns = isl.Constraint.equality_from_aff(iname_plus_lb_aff)
        expr = pw_aff_to_expr(aff)

        new_impl_domain = new_impl_domain.add_constraint(cns)
        return self.copy_and_assign(iname, expr).copy(implemented_domain=new_impl_domain)
Пример #5
0
def get_slab_decomposition(kernel, iname):
    iname_domain = kernel.get_inames_domain(iname)

    if iname_domain.is_empty():
        return ()

    space = iname_domain.space

    lower_incr, upper_incr = kernel.iname_slab_increments.get(iname, (0, 0))
    lower_bulk_bound = None
    upper_bulk_bound = None

    if lower_incr or upper_incr:
        bounds = kernel.get_iname_bounds(iname)

        lower_bound_pw_aff_pieces = bounds.lower_bound_pw_aff.coalesce(
        ).get_pieces()
        upper_bound_pw_aff_pieces = bounds.upper_bound_pw_aff.coalesce(
        ).get_pieces()

        if len(lower_bound_pw_aff_pieces) > 1:
            raise NotImplementedError(
                "lower bound for slab decomp of '%s' needs "
                "conditional/has more than one piece" % iname)
        if len(upper_bound_pw_aff_pieces) > 1:
            raise NotImplementedError(
                "upper bound for slab decomp of '%s' needs "
                "conditional/has more than one piece" % iname)

        (_, lower_bound_aff), = lower_bound_pw_aff_pieces
        (_, upper_bound_aff), = upper_bound_pw_aff_pieces

        from loopy.isl_helpers import iname_rel_aff

        if lower_incr:
            assert lower_incr > 0
            lower_slab = ("initial",
                          isl.BasicSet.universe(space).add_constraint(
                              isl.Constraint.inequality_from_aff(
                                  iname_rel_aff(space, iname, "<",
                                                lower_bound_aff +
                                                lower_incr))))
            lower_bulk_bound = (isl.Constraint.inequality_from_aff(
                iname_rel_aff(space, iname, ">=",
                              lower_bound_aff + lower_incr)))
        else:
            lower_slab = None

        if upper_incr:
            assert upper_incr > 0
            upper_slab = ("final", isl.BasicSet.universe(space).add_constraint(
                isl.Constraint.inequality_from_aff(
                    iname_rel_aff(space, iname, ">",
                                  upper_bound_aff - upper_incr))))
            upper_bulk_bound = (isl.Constraint.inequality_from_aff(
                iname_rel_aff(space, iname, "<=",
                              upper_bound_aff - upper_incr)))
        else:
            upper_slab = None

        slabs = []

        bulk_slab = isl.BasicSet.universe(space)
        if lower_bulk_bound is not None:
            bulk_slab = bulk_slab.add_constraint(lower_bulk_bound)
        if upper_bulk_bound is not None:
            bulk_slab = bulk_slab.add_constraint(upper_bulk_bound)

        slabs.append(("bulk", bulk_slab))
        if lower_slab:
            slabs.append(lower_slab)
        if upper_slab:
            slabs.append(upper_slab)

        return slabs

    else:
        return [("bulk", (isl.BasicSet.universe(space)))]
Пример #6
0
def get_slab_decomposition(kernel, iname, sched_index, codegen_state):
    iname_domain = kernel.get_inames_domain(iname)

    if iname_domain.is_empty():
        return ()

    space = iname_domain.space

    lower_incr, upper_incr = kernel.iname_slab_increments.get(iname, (0, 0))
    lower_bulk_bound = None
    upper_bulk_bound = None

    if lower_incr or upper_incr:
        bounds = kernel.get_iname_bounds(iname)

        lower_bound_pw_aff_pieces = bounds.lower_bound_pw_aff.coalesce().get_pieces()
        upper_bound_pw_aff_pieces = bounds.upper_bound_pw_aff.coalesce().get_pieces()

        if len(lower_bound_pw_aff_pieces) > 1:
            raise NotImplementedError("lower bound for slab decomp of '%s' needs "
                    "conditional/has more than one piece" % iname)
        if len(upper_bound_pw_aff_pieces) > 1:
            raise NotImplementedError("upper bound for slab decomp of '%s' needs "
                    "conditional/has more than one piece" % iname)

        (_, lower_bound_aff), = lower_bound_pw_aff_pieces
        (_, upper_bound_aff), = upper_bound_pw_aff_pieces

        from loopy.isl_helpers import iname_rel_aff

        if lower_incr:
            assert lower_incr > 0
            lower_slab = ("initial", isl.BasicSet.universe(space)
                    .add_constraint(
                        isl.Constraint.inequality_from_aff(
                            iname_rel_aff(space,
                                iname, "<", lower_bound_aff+lower_incr))))
            lower_bulk_bound = (
                    isl.Constraint.inequality_from_aff(
                        iname_rel_aff(space,
                            iname, ">=", lower_bound_aff+lower_incr)))
        else:
            lower_slab = None

        if upper_incr:
            assert upper_incr > 0
            upper_slab = ("final", isl.BasicSet.universe(space)
                    .add_constraint(
                        isl.Constraint.inequality_from_aff(
                            iname_rel_aff(space,
                                iname, ">", upper_bound_aff-upper_incr))))
            upper_bulk_bound = (
                    isl.Constraint.inequality_from_aff(
                        iname_rel_aff(space,
                            iname, "<=", upper_bound_aff-upper_incr)))
        else:
            lower_slab = None

        slabs = []

        bulk_slab = isl.BasicSet.universe(space)
        if lower_bulk_bound is not None:
            bulk_slab = bulk_slab.add_constraint(lower_bulk_bound)
        if upper_bulk_bound is not None:
            bulk_slab = bulk_slab.add_constraint(upper_bulk_bound)

        slabs.append(("bulk", bulk_slab))
        if lower_slab:
            slabs.append(lower_slab)
        if upper_slab:
            slabs.append(upper_slab)

        return slabs

    else:
        return [("bulk", (isl.BasicSet.universe(space)))]
Пример #7
0
    def map_Do(self, node):
        scope = self.scope_stack[-1]

        if not node.loopcontrol:
            raise NotImplementedError("unbounded do loop")

        loop_var, loop_bounds = node.loopcontrol.split("=")
        loop_var = loop_var.strip()

        iname_dtype = scope.get_type(loop_var)
        if self.index_dtype is None:
            self.index_dtype = iname_dtype
        else:
            if self.index_dtype != iname_dtype:
                raise LoopyError("type of '%s' (%s) does not agree with prior "
                        "index type (%s)"
                        % (loop_var, iname_dtype, self.index_dtype))

        scope.use_name(loop_var)
        loop_bounds = self.parse_expr(
                node,
                loop_bounds, min_precedence=self.expr_parser._PREC_FUNC_ARGS)

        if len(loop_bounds) == 2:
            start, stop = loop_bounds
            step = 1
        elif len(loop_bounds) == 3:
            start, stop, step = loop_bounds
        else:
            raise RuntimeError("loop bounds not understood: %s"
                    % node.loopcontrol)

        if step != 1:
            raise NotImplementedError(
                    "do loops with non-unit stride")

        if not isinstance(step, int):
            raise TranslationError(
                    "non-constant steps not supported: %s" % step)

        from loopy.symbolic import get_dependencies
        loop_bound_deps = (
                get_dependencies(start)
                | get_dependencies(stop)
                | get_dependencies(step))

        # {{{ find a usable loopy-side loop name

        loopy_loop_var = loop_var
        loop_var_suffix = None
        while True:
            already_used = False
            for iset in scope.index_sets:
                if loopy_loop_var in iset.get_var_dict(dim_type.set):
                    already_used = True
                    break

            if not already_used:
                break

            if loop_var_suffix is None:
                loop_var_suffix = 0

            loop_var_suffix += 1
            loopy_loop_var = loop_var + "_%d" % loop_var_suffix

        loopy_loop_var = intern(loopy_loop_var)

        # }}}

        space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT,
                set=[loopy_loop_var], params=list(loop_bound_deps))

        from loopy.isl_helpers import iname_rel_aff
        from loopy.symbolic import aff_from_expr
        index_set = (
                isl.BasicSet.universe(space)
                .add_constraint(
                    isl.Constraint.inequality_from_aff(
                        iname_rel_aff(space,
                            loopy_loop_var, ">=",
                            aff_from_expr(space, 0))))
                .add_constraint(
                    isl.Constraint.inequality_from_aff(
                        iname_rel_aff(space,
                            loopy_loop_var, "<=",
                            aff_from_expr(space, stop-start)))))

        from pymbolic import var
        scope.active_iname_aliases[loop_var] = \
                var(loopy_loop_var) + start
        scope.active_loopy_inames.add(loopy_loop_var)

        scope.index_sets.append(index_set)

        self.block_nest.append("do")

        for c in node.content:
            self.rec(c)

        del scope.active_iname_aliases[loop_var]
        scope.active_loopy_inames.remove(loopy_loop_var)
Пример #8
0
    def map_Do(self, node):
        scope = self.scope_stack[-1]

        if not node.loopcontrol:
            raise NotImplementedError("unbounded do loop")

        loop_var, loop_bounds = node.loopcontrol.split("=")
        loop_var = loop_var.strip()

        iname_dtype = scope.get_type(loop_var)
        if self.index_dtype is None:
            self.index_dtype = iname_dtype
        else:
            if self.index_dtype != iname_dtype:
                raise LoopyError("type of '%s' (%s) does not agree with prior "
                        "index type (%s)"
                        % (loop_var, iname_dtype, self.index_dtype))

        scope.use_name(loop_var)
        loop_bounds = self.parse_expr(
                node,
                loop_bounds, min_precedence=self.expr_parser._PREC_FUNC_ARGS)

        if len(loop_bounds) == 2:
            start, stop = loop_bounds
            step = 1
        elif len(loop_bounds) == 3:
            start, stop, step = loop_bounds
        else:
            raise RuntimeError("loop bounds not understood: %s"
                    % node.loopcontrol)

        if step != 1:
            raise NotImplementedError(
                    "do loops with non-unit stride")

        if not isinstance(step, int):
            raise TranslationError(
                    "non-constant steps not supported: %s" % step)

        from loopy.symbolic import get_dependencies
        loop_bound_deps = (
                get_dependencies(start)
                | get_dependencies(stop)
                | get_dependencies(step))

        # {{{ find a usable loopy-side loop name

        loopy_loop_var = loop_var
        loop_var_suffix = None
        while True:
            already_used = False
            for iset in scope.index_sets:
                if loopy_loop_var in iset.get_var_dict(dim_type.set):
                    already_used = True
                    break

            if not already_used:
                break

            if loop_var_suffix is None:
                loop_var_suffix = 0

            loop_var_suffix += 1
            loopy_loop_var = loop_var + "_%d" % loop_var_suffix

        # }}}

        space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT,
                set=[loopy_loop_var], params=list(loop_bound_deps))

        from loopy.isl_helpers import iname_rel_aff
        from loopy.symbolic import aff_from_expr
        index_set = (
                isl.BasicSet.universe(space)
                .add_constraint(
                    isl.Constraint.inequality_from_aff(
                        iname_rel_aff(space,
                            loopy_loop_var, ">=",
                            aff_from_expr(space, 0))))
                .add_constraint(
                    isl.Constraint.inequality_from_aff(
                        iname_rel_aff(space,
                            loopy_loop_var, "<=",
                            aff_from_expr(space, stop-start)))))

        from pymbolic import var
        scope.active_iname_aliases[loop_var] = \
                var(loopy_loop_var) + start
        scope.active_loopy_inames.add(loopy_loop_var)

        scope.index_sets.append(index_set)

        self.block_nest.append("do")

        for c in node.content:
            self.rec(c)

        del scope.active_iname_aliases[loop_var]
        scope.active_loopy_inames.remove(loopy_loop_var)
Пример #9
0
def join_inames(kernel, inames, new_iname=None, tag=None, within=None):
    """
    :arg inames: fastest varying last
    :arg within: a stack match as understood by
        :func:`loopy.context_matching.parse_stack_match`.
    """

    # now fastest varying first
    inames = inames[::-1]

    if new_iname is None:
        new_iname = kernel.get_var_name_generator()("_and_".join(inames))

    from loopy.kernel.tools import DomainChanger
    domch = DomainChanger(kernel, frozenset(inames))
    for iname in inames:
        if kernel.get_home_domain_index(iname) != domch.leaf_domain_index:
            raise LoopyError("iname '%s' is not 'at home' in the "
                    "join's leaf domain" % iname)

    new_domain = domch.domain
    new_dim_idx = new_domain.dim(dim_type.set)
    new_domain = new_domain.add_dims(dim_type.set, 1)
    new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname)

    joint_aff = zero = isl.Aff.zero_on_domain(new_domain.space)
    subst_dict = {}
    base_divisor = 1

    from pymbolic import var

    for i, iname in enumerate(inames):
        iname_dt, iname_idx = zero.get_space().get_var_dict()[iname]
        iname_aff = zero.add_coefficient_val(iname_dt, iname_idx, 1)

        joint_aff = joint_aff + base_divisor*iname_aff

        bounds = kernel.get_iname_bounds(iname, constants_only=True)

        from loopy.isl_helpers import (
                static_max_of_pw_aff, static_value_of_pw_aff)
        from loopy.symbolic import pw_aff_to_expr

        length = int(pw_aff_to_expr(
            static_max_of_pw_aff(bounds.size, constants_only=True)))

        try:
            lower_bound_aff = static_value_of_pw_aff(
                    bounds.lower_bound_pw_aff.coalesce(),
                    constants_only=False)
        except Exception as e:
            raise type(e)("while finding lower bound of '%s': " % iname)

        my_val = var(new_iname) // base_divisor
        if i+1 < len(inames):
            my_val %= length
        my_val += pw_aff_to_expr(lower_bound_aff)
        subst_dict[iname] = my_val

        base_divisor *= length

    from loopy.isl_helpers import iname_rel_aff
    new_domain = new_domain.add_constraint(
            isl.Constraint.equality_from_aff(
                iname_rel_aff(new_domain.get_space(), new_iname, "==", joint_aff)))

    for i, iname in enumerate(inames):
        iname_to_dim = new_domain.get_space().get_var_dict()
        iname_dt, iname_idx = iname_to_dim[iname]

        if within is None:
            new_domain = new_domain.project_out(iname_dt, iname_idx, 1)

    def subst_forced_iname_deps(fid):
        result = set()
        for iname in fid:
            if iname in inames:
                result.add(new_iname)
            else:
                result.add(iname)

        return frozenset(result)

    new_insns = [
            insn.copy(
                forced_iname_deps=subst_forced_iname_deps(insn.forced_iname_deps))
            for insn in kernel.instructions]

    kernel = (kernel
            .copy(
                instructions=new_insns,
                domains=domch.get_domains_with(new_domain),
                applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict]
                ))

    from loopy.context_matching import parse_stack_match
    within = parse_stack_match(within)

    from pymbolic.mapper.substitutor import make_subst_func
    rule_mapping_context = SubstitutionRuleMappingContext(
            kernel.substitutions, kernel.get_var_name_generator())
    ijoin = _InameJoiner(rule_mapping_context, within,
            make_subst_func(subst_dict),
            inames, new_iname)

    kernel = rule_mapping_context.finish_kernel(
            ijoin.map_kernel(kernel))

    if tag is not None:
        kernel = tag_inames(kernel, {new_iname: tag})

    return kernel
Пример #10
0
def join_inames(kernel, inames, new_iname=None, tag=None, within=None):
    """
    :arg inames: fastest varying last
    :arg within: a stack match as understood by
        :func:`loopy.match.parse_stack_match`.
    """

    # now fastest varying first
    inames = inames[::-1]

    if new_iname is None:
        new_iname = kernel.get_var_name_generator()("_and_".join(inames))

    from loopy.kernel.tools import DomainChanger
    domch = DomainChanger(kernel, frozenset(inames))
    for iname in inames:
        if kernel.get_home_domain_index(iname) != domch.leaf_domain_index:
            raise LoopyError("iname '%s' is not 'at home' in the "
                             "join's leaf domain" % iname)

    new_domain = domch.domain
    new_dim_idx = new_domain.dim(dim_type.set)
    new_domain = new_domain.add_dims(dim_type.set, 1)
    new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname)

    joint_aff = zero = isl.Aff.zero_on_domain(new_domain.space)
    subst_dict = {}
    base_divisor = 1

    from pymbolic import var

    for i, iname in enumerate(inames):
        iname_dt, iname_idx = zero.get_space().get_var_dict()[iname]
        iname_aff = zero.add_coefficient_val(iname_dt, iname_idx, 1)

        joint_aff = joint_aff + base_divisor * iname_aff

        bounds = kernel.get_iname_bounds(iname, constants_only=True)

        from loopy.isl_helpers import (static_max_of_pw_aff,
                                       static_value_of_pw_aff)
        from loopy.symbolic import pw_aff_to_expr

        length = int(
            pw_aff_to_expr(
                static_max_of_pw_aff(bounds.size, constants_only=True)))

        try:
            lower_bound_aff = static_value_of_pw_aff(
                bounds.lower_bound_pw_aff.coalesce(), constants_only=False)
        except Exception as e:
            raise type(e)("while finding lower bound of '%s': " % iname)

        my_val = var(new_iname) // base_divisor
        if i + 1 < len(inames):
            my_val %= length
        my_val += pw_aff_to_expr(lower_bound_aff)
        subst_dict[iname] = my_val

        base_divisor *= length

    from loopy.isl_helpers import iname_rel_aff
    new_domain = new_domain.add_constraint(
        isl.Constraint.equality_from_aff(
            iname_rel_aff(new_domain.get_space(), new_iname, "==", joint_aff)))

    for i, iname in enumerate(inames):
        iname_to_dim = new_domain.get_space().get_var_dict()
        iname_dt, iname_idx = iname_to_dim[iname]

        if within is None:
            new_domain = new_domain.project_out(iname_dt, iname_idx, 1)

    def subst_within_inames(fid):
        result = set()
        for iname in fid:
            if iname in inames:
                result.add(new_iname)
            else:
                result.add(iname)

        return frozenset(result)

    new_insns = [
        insn.copy(within_inames=subst_within_inames(insn.within_inames))
        for insn in kernel.instructions
    ]

    kernel = (kernel.copy(
        instructions=new_insns,
        domains=domch.get_domains_with(new_domain),
        applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict]))

    from loopy.match import parse_stack_match
    within = parse_stack_match(within)

    from pymbolic.mapper.substitutor import make_subst_func
    rule_mapping_context = SubstitutionRuleMappingContext(
        kernel.substitutions, kernel.get_var_name_generator())
    ijoin = _InameJoiner(rule_mapping_context, within,
                         make_subst_func(subst_dict), inames, new_iname)

    kernel = rule_mapping_context.finish_kernel(ijoin.map_kernel(kernel))

    if tag is not None:
        kernel = tag_inames(kernel, {new_iname: tag})

    return kernel