Exemplo n.º 1
0
def intersect_kernel_with_slab(kernel, slab, iname):
    from loopy.kernel.tools import DomainChanger

    domch = DomainChanger(kernel, (iname, ))
    orig_domain = domch.get_original_domain()
    orig_domain, slab = isl.align_two(slab, orig_domain)
    return domch.get_kernel_with(orig_domain & slab)
Exemplo n.º 2
0
def intersect_kernel_with_slab(kernel, slab, iname):
    from loopy.kernel.tools import DomainChanger

    domch = DomainChanger(kernel, (iname,))
    orig_domain = domch.get_original_domain()
    orig_domain, slab = isl.align_two(orig_domain, slab)
    return domch.get_kernel_with(orig_domain & slab)
Exemplo n.º 3
0
    def save_or_reload_impl(self,
                            temporary,
                            subkernel,
                            mode,
                            promoted_temporary=lp.auto):
        assert mode in ("save", "reload")

        if promoted_temporary is auto:
            promoted_temporary = self.auto_promote_temporary(temporary)

        if promoted_temporary is None:
            return

        from loopy.kernel.tools import DomainChanger
        dchg = DomainChanger(
            self.kernel,
            frozenset(
                self.insn_query.inames_in_subkernel(subkernel)
                | set(promoted_temporary.hw_inames)))

        domain, hw_inames, dim_inames, iname_to_tag = \
            self.augment_domain_for_save_or_reload(
                dchg.domain, promoted_temporary, mode, subkernel)

        self.kernel = dchg.get_kernel_with(domain)

        save_or_load_insn_id = self.insn_name_gen("{name}.{mode}".format(
            name=temporary, mode=mode))

        def subscript_or_var(agg, subscript=()):
            from pymbolic.primitives import Subscript, Variable
            if len(subscript) == 0:
                return Variable(agg)
            else:
                return Subscript(Variable(agg), tuple(map(Variable,
                                                          subscript)))

        dim_inames_trunc = dim_inames[:len(promoted_temporary.orig_temporary.
                                           shape)]

        args = (subscript_or_var(temporary, dim_inames_trunc),
                subscript_or_var(promoted_temporary.name,
                                 hw_inames + dim_inames))

        if mode == "save":
            args = reversed(args)

        accessing_insns_in_subkernel = (
            self.insn_query.insns_reading_or_writing(temporary)
            & self.insn_query.insns_in_subkernel(subkernel))

        if mode == "save":
            depends_on = accessing_insns_in_subkernel
            update_deps = frozenset()
        elif mode == "reload":
            depends_on = frozenset()
            update_deps = accessing_insns_in_subkernel

        pre_barrier, post_barrier = self.insn_query.pre_and_post_barriers(
            subkernel)

        if pre_barrier is not None:
            depends_on |= set([pre_barrier])

        if post_barrier is not None:
            update_deps |= set([post_barrier])

        # Create the load / store instruction.
        from loopy.kernel.data import Assignment
        save_or_load_insn = Assignment(
            *args,
            id=save_or_load_insn_id,
            within_inames=(self.insn_query.inames_in_subkernel(subkernel)
                           | frozenset(hw_inames + dim_inames)),
            within_inames_is_final=True,
            depends_on=depends_on,
            boostable=False,
            boostable_into=frozenset())

        if temporary not in self.saves_or_reloads_added:
            self.saves_or_reloads_added[temporary] = set()
        self.saves_or_reloads_added[temporary].add(save_or_load_insn_id)

        self.insns_to_insert.append(save_or_load_insn)

        for insn_id in update_deps:
            insn = self.insns_to_update.get(insn_id,
                                            self.kernel.id_to_insn[insn_id])
            self.insns_to_update[insn_id] = insn.copy(
                depends_on=insn.depends_on | frozenset([save_or_load_insn_id]))

        self.updated_temporary_variables[promoted_temporary.name] = \
            promoted_temporary.as_variable()

        self.updated_iname_to_tag.update(iname_to_tag)
Exemplo n.º 4
0
        def insert_loads_or_spills(tvals, mode):
            assert mode in ["load", "spill"]
            local_temporaries = set()

            code_block = \
                subkernel_prolog if mode == "load" else subkernel_epilog

            new_kernel = kernel

            for tval in tvals:
                from loopy.kernel.tools import DomainChanger
                tval_hw_inames = new_temporaries[tval].hw_inames
                dchg = DomainChanger(kernel,
                    frozenset(sched_item.extra_inames + tval_hw_inames))
                domain = dchg.domain

                domain, hw_inames, dim_inames, itt = \
                    augment_domain_for_temporary_promotion(
                        new_kernel, domain, new_temporaries[tval], mode,
                        name_gen)
                new_iname_to_tag.update(itt)

                new_kernel = dchg.get_kernel_with(domain)

                # Add the load / spill instruction.
                insn_id = name_gen("{name}.{mode}".format(name=tval, mode=mode))

                def subscript_or_var(agg, subscript):
                    from pymbolic.primitives import Subscript, Variable
                    if len(subscript) == 0:
                        return Variable(agg)
                    else:
                        return Subscript(
                            Variable(agg),
                            tuple(map(Variable, subscript)))

                args = (
                    subscript_or_var(
                        tval, dim_inames),
                    subscript_or_var(
                        new_temporaries[tval].name, hw_inames + dim_inames))

                if mode == "spill":
                    args = reversed(args)

                from loopy.kernel.data import Assignment
                new_insn = Assignment(*args, id=insn_id)

                new_instructions.append(new_insn)

                loop_begin = [EnterLoop(iname=iname) for iname in dim_inames]
                loop_end = list(reversed([
                    LeaveLoop(iname=iname) for iname in dim_inames]))
                code_block.extend(
                    loop_begin +
                    [RunInstruction(insn_id=insn_id)] +
                    loop_end)
                if new_temporaries[tval].orig_temporary.is_local:
                    local_temporaries.add(new_temporaries[tval].name)

            # After loading / before spilling local temporaries, we need to
            # insert a barrier.
            if local_temporaries:
                if mode == "load":
                    subkernel_prolog.append(
                        Barrier(kind="local",
                                comment="for loads of {0}".format(
                                    ", ".join(sorted(local_temporaries)))))
                else:
                    subkernel_epilog.insert(0,
                        Barrier(kind="local",
                                comment="for spills of {0}".format(
                                    ", ".join(sorted(local_temporaries)))))
            return new_kernel
Exemplo n.º 5
0
        def insert_loads_or_spills(tvals, mode):
            assert mode in ["load", "spill"]
            local_temporaries = set()

            code_block = \
                subkernel_prolog if mode == "load" else subkernel_epilog

            new_kernel = kernel

            for tval in tvals:
                from loopy.kernel.tools import DomainChanger
                tval_hw_inames = new_temporaries[tval].hw_inames
                dchg = DomainChanger(
                    kernel,
                    frozenset(sched_item.extra_inames + tval_hw_inames))
                domain = dchg.domain

                domain, hw_inames, dim_inames, itt = \
                    augment_domain_for_temporary_promotion(
                        new_kernel, domain, new_temporaries[tval], mode,
                        name_gen)
                new_iname_to_tag.update(itt)

                new_kernel = dchg.get_kernel_with(domain)

                # Add the load / spill instruction.
                insn_id = name_gen("{name}.{mode}".format(name=tval,
                                                          mode=mode))

                def subscript_or_var(agg, subscript):
                    from pymbolic.primitives import Subscript, Variable
                    if len(subscript) == 0:
                        return Variable(agg)
                    else:
                        return Subscript(Variable(agg),
                                         tuple(map(Variable, subscript)))

                args = (subscript_or_var(tval, dim_inames),
                        subscript_or_var(new_temporaries[tval].name,
                                         hw_inames + dim_inames))

                if mode == "spill":
                    args = reversed(args)

                from loopy.kernel.data import Assignment
                new_insn = Assignment(*args, id=insn_id)

                new_instructions.append(new_insn)

                loop_begin = [EnterLoop(iname=iname) for iname in dim_inames]
                loop_end = list(
                    reversed([LeaveLoop(iname=iname) for iname in dim_inames]))
                code_block.extend(loop_begin +
                                  [RunInstruction(insn_id=insn_id)] + loop_end)
                if new_temporaries[tval].orig_temporary.is_local:
                    local_temporaries.add(new_temporaries[tval].name)

            # After loading / before spilling local temporaries, we need to
            # insert a barrier.
            if local_temporaries:
                if mode == "load":
                    subkernel_prolog.append(
                        Barrier(kind="local",
                                comment="for loads of {0}".format(", ".join(
                                    sorted(local_temporaries)))))
                else:
                    subkernel_epilog.insert(
                        0,
                        Barrier(kind="local",
                                comment="for spills of {0}".format(", ".join(
                                    sorted(local_temporaries)))))
            return new_kernel