示例#1
0
 def __init__(self, kernel):
     self.kernel = kernel
     self.insn_query = InstructionQuery(kernel)
     self.var_name_gen = kernel.get_var_name_generator()
     self.insn_name_gen = kernel.get_instruction_id_generator()
     # These fields keep track of updates to the kernel.
     self.insns_to_insert = []
     self.insns_to_update = {}
     self.extra_args_to_add = {}
     self.updated_iname_to_tag = {}
     self.updated_temporary_variables = {}
     self.saves_or_reloads_added = {}
示例#2
0
文件: check.py 项目: arghdos/loopy
def check_that_temporaries_are_defined_in_subkernels_where_used(kernel):
    from loopy.schedule.tools import InstructionQuery
    from loopy.kernel.data import temp_var_scope

    insn_query = InstructionQuery(kernel)

    for subkernel in insn_query.subkernels():
        defined_base_storage = set()

        for temporary in insn_query.temporaries_written_in_subkernel(
                subkernel):
            tval = kernel.temporary_variables[temporary]
            if tval.base_storage is not None:
                defined_base_storage.add(tval.base_storage)

        for temporary in (
                insn_query.temporaries_read_in_subkernel(subkernel) -
                insn_query.temporaries_written_in_subkernel(subkernel)):
            tval = kernel.temporary_variables[temporary]

            if tval.initializer is not None:
                continue

            # For aliased temporaries, check if there is an aliased definition.
            if tval.base_storage is not None:
                if tval.base_storage not in defined_base_storage:
                    from loopy.diagnostic import MissingDefinitionError
                    raise MissingDefinitionError(
                        "temporary variable '%s' gets used "
                        "in subkernel '%s' and neither it nor its aliases have a "
                        "definition" % (temporary, subkernel))
                continue

            if tval.scope in (temp_var_scope.PRIVATE, temp_var_scope.LOCAL):
                from loopy.diagnostic import MissingDefinitionError
                raise MissingDefinitionError(
                    "temporary variable '%s' gets used in "
                    "subkernel '%s' without a definition (maybe you forgot to call "
                    "loopy.save_and_reload_temporaries?)" %
                    (temporary, subkernel))
示例#3
0
def save_and_reload_temporaries(knl):
    """
    Add instructions to save and reload temporary variables that are live
    across kernel calls.

    The basic code transformation turns schedule segments::

        t = <...>
        <return followed by call>
        <...> = t

    into this code::

        t = <...>
        t_save_slot = t
        <return followed by call>
        t = t_save_slot
        <...> = t

    where `t_save_slot` is a newly-created global temporary variable.

    :returns: The resulting kernel
    """
    liveness = LivenessAnalysis(knl)
    saver = TemporarySaver(knl)

    insn_query = InstructionQuery(knl)

    for sched_idx, sched_item in enumerate(knl.schedule):

        if isinstance(sched_item, CallKernel):
            # Any written temporary that is live-out needs to be read into
            # memory because of the potential for partial writes.
            if sched_idx == 0:
                # Kernel entry: nothing live
                interesting_temporaries = set()
            else:
                interesting_temporaries = (
                    insn_query.temporaries_read_or_written_in_subkernel(
                        sched_item.kernel_name))

            for temporary in liveness[
                    sched_idx].live_out & interesting_temporaries:
                logger.info("reloading {0} at entry of {1}".format(
                    temporary, sched_item.kernel_name))
                saver.reload(temporary, sched_item.kernel_name)

        elif isinstance(sched_item, ReturnFromKernel):
            if sched_idx == len(knl.schedule) - 1:
                # Kernel exit: nothing live
                interesting_temporaries = set()
            else:
                interesting_temporaries = (
                    insn_query.temporaries_written_in_subkernel(
                        sched_item.kernel_name))

            for temporary in liveness[
                    sched_idx].live_in & interesting_temporaries:
                logger.info("saving {0} before return of {1}".format(
                    temporary, sched_item.kernel_name))
                saver.save(temporary, sched_item.kernel_name)

    return saver.finish()
示例#4
0
class TemporarySaver(object):
    class PromotedTemporary(Record):
        """
        .. attribute:: name

            The name of the new temporary.

        .. attribute:: orig_temporary

            The original temporary variable object.

        .. attribute:: hw_inames

            The common list of hw axes that define the original object.

        .. attribute:: hw_dims

            A list of expressions, to be added in front of the shape
            of the promoted temporary value, corresponding to
            hardware dimensions

        .. attribute:: non_hw_dims

            A list of expressions, to be added in front of the shape
            of the promoted temporary value, corresponding to
            non-hardware dimensions
        """
        @memoize_method
        def as_variable(self):
            temporary = self.orig_temporary
            from loopy.kernel.data import TemporaryVariable
            return TemporaryVariable(name=self.name,
                                     dtype=temporary.dtype,
                                     scope=temp_var_scope.GLOBAL,
                                     shape=self.new_shape)

        @property
        def new_shape(self):
            return self.hw_dims + self.non_hw_dims

    def __init__(self, kernel):
        self.kernel = kernel
        self.insn_query = InstructionQuery(kernel)
        self.var_name_gen = kernel.get_var_name_generator()
        self.insn_name_gen = kernel.get_instruction_id_generator()
        # These fields keep track of updates to the kernel.
        self.insns_to_insert = []
        self.insns_to_update = {}
        self.extra_args_to_add = {}
        self.updated_iname_to_tag = {}
        self.updated_temporary_variables = {}
        self.saves_or_reloads_added = {}

    @memoize_method
    def auto_promote_temporary(self, temporary_name):
        temporary = self.kernel.temporary_variables[temporary_name]

        if temporary.scope == temp_var_scope.GLOBAL:
            # Nothing to be done for global temporaries (I hope)
            return None

        if temporary.initializer is not None:
            # Temporaries with initializers do not need saving/reloading - the
            # code generation takes care of emitting the initializers.
            assert temporary.read_only
            return None

        if temporary.base_storage is not None:
            raise ValueError(
                "Cannot promote temporaries with base_storage to global")

        # `hw_inames`: The set of hw-parallel tagged inames that this temporary
        # is associated with. This is used for determining the shape of the
        # global storage needed for saving and restoring the temporary across
        # kernel calls.
        #
        # TODO: Make a policy decision about which dimensions to use. Currently,
        # the code looks at each instruction that defines or uses the temporary,
        # and takes the common set of hw-parallel tagged inames associated with
        # these instructions.
        #
        # Furthermore, in the case of local temporaries, inames that are tagged
        # hw-local do not contribute to the global storage shape.
        hw_inames = self.insn_query.common_hw_inames(
            self.insn_query.insns_reading_or_writing(temporary.name))

        # We want hw_inames to be arranged according to the order:
        #    g.0 < g.1 < ... < l.0 < l.1 < ...
        # Sorting lexicographically accomplishes this.
        hw_inames = sorted(
            hw_inames, key=lambda iname: str(self.kernel.iname_to_tag[iname]))

        # Calculate the sizes of the dimensions that get added in front for
        # the global storage of the temporary.
        hw_dims = []

        backing_hw_inames = []

        for iname in hw_inames:
            tag = self.kernel.iname_to_tag[iname]
            from loopy.kernel.data import LocalIndexTag
            is_local_iname = isinstance(tag, LocalIndexTag)
            if is_local_iname and temporary.scope == temp_var_scope.LOCAL:
                # Restrict shape to that of group inames for locals.
                continue
            backing_hw_inames.append(iname)
            from loopy.isl_helpers import static_max_of_pw_aff
            from loopy.symbolic import aff_to_expr
            hw_dims.append(
                aff_to_expr(
                    static_max_of_pw_aff(
                        self.kernel.get_iname_bounds(iname).size, False)))

        non_hw_dims = temporary.shape

        if len(non_hw_dims) == 0 and len(hw_dims) == 0:
            # Scalar not in hardware: ensure at least one dimension.
            non_hw_dims = (1, )

        backing_temporary = self.PromotedTemporary(
            name=self.var_name_gen(temporary.name + "_save_slot"),
            orig_temporary=temporary,
            hw_dims=tuple(hw_dims),
            non_hw_dims=non_hw_dims,
            hw_inames=backing_hw_inames)

        return backing_temporary

    def save_or_reload_impl(self,
                            temporary,
                            subkernel,
                            mode,
                            promoted_temporary=lp.auto):
        assert mode in ("save", "reload")

        if promoted_temporary is auto:
            promoted_temporary = self.auto_promote_temporary(temporary)

        if promoted_temporary is None:
            return

        from loopy.kernel.tools import DomainChanger
        dchg = DomainChanger(
            self.kernel,
            frozenset(
                self.insn_query.inames_in_subkernel(subkernel)
                | set(promoted_temporary.hw_inames)))

        domain, hw_inames, dim_inames, iname_to_tag = \
            self.augment_domain_for_save_or_reload(
                dchg.domain, promoted_temporary, mode, subkernel)

        self.kernel = dchg.get_kernel_with(domain)

        save_or_load_insn_id = self.insn_name_gen("{name}.{mode}".format(
            name=temporary, mode=mode))

        def subscript_or_var(agg, subscript=()):
            from pymbolic.primitives import Subscript, Variable
            if len(subscript) == 0:
                return Variable(agg)
            else:
                return Subscript(Variable(agg), tuple(map(Variable,
                                                          subscript)))

        dim_inames_trunc = dim_inames[:len(promoted_temporary.orig_temporary.
                                           shape)]

        args = (subscript_or_var(temporary, dim_inames_trunc),
                subscript_or_var(promoted_temporary.name,
                                 hw_inames + dim_inames))

        if mode == "save":
            args = reversed(args)

        accessing_insns_in_subkernel = (
            self.insn_query.insns_reading_or_writing(temporary)
            & self.insn_query.insns_in_subkernel(subkernel))

        if mode == "save":
            depends_on = accessing_insns_in_subkernel
            update_deps = frozenset()
        elif mode == "reload":
            depends_on = frozenset()
            update_deps = accessing_insns_in_subkernel

        pre_barrier, post_barrier = self.insn_query.pre_and_post_barriers(
            subkernel)

        if pre_barrier is not None:
            depends_on |= set([pre_barrier])

        if post_barrier is not None:
            update_deps |= set([post_barrier])

        # Create the load / store instruction.
        from loopy.kernel.data import Assignment
        save_or_load_insn = Assignment(
            *args,
            id=save_or_load_insn_id,
            within_inames=(self.insn_query.inames_in_subkernel(subkernel)
                           | frozenset(hw_inames + dim_inames)),
            within_inames_is_final=True,
            depends_on=depends_on,
            boostable=False,
            boostable_into=frozenset())

        if temporary not in self.saves_or_reloads_added:
            self.saves_or_reloads_added[temporary] = set()
        self.saves_or_reloads_added[temporary].add(save_or_load_insn_id)

        self.insns_to_insert.append(save_or_load_insn)

        for insn_id in update_deps:
            insn = self.insns_to_update.get(insn_id,
                                            self.kernel.id_to_insn[insn_id])
            self.insns_to_update[insn_id] = insn.copy(
                depends_on=insn.depends_on | frozenset([save_or_load_insn_id]))

        self.updated_temporary_variables[promoted_temporary.name] = \
            promoted_temporary.as_variable()

        self.updated_iname_to_tag.update(iname_to_tag)

    @memoize_method
    def finish(self):
        new_instructions = []

        insns_to_insert = dict(
            (insn.id, insn) for insn in self.insns_to_insert)

        # Add global no_sync_with between any added reloads and saves
        from six import iteritems
        for temporary, added_insns in iteritems(self.saves_or_reloads_added):
            for insn_id in added_insns:
                insn = insns_to_insert[insn_id]
                insns_to_insert[insn_id] = insn.copy(no_sync_with=frozenset(
                    (added_insn, "global") for added_insn in added_insns))

        for orig_insn in self.kernel.instructions:
            if orig_insn.id in self.insns_to_update:
                new_instructions.append(self.insns_to_update[orig_insn.id])
            else:
                new_instructions.append(orig_insn)
        new_instructions.extend(
            sorted(insns_to_insert.values(), key=lambda insn: insn.id))

        self.updated_iname_to_tag.update(self.kernel.iname_to_tag)
        self.updated_temporary_variables.update(
            self.kernel.temporary_variables)

        kernel = self.kernel.copy(
            instructions=new_instructions,
            iname_to_tag=self.updated_iname_to_tag,
            temporary_variables=self.updated_temporary_variables,
            overridden_get_grid_sizes_for_insn_ids=None)

        from loopy.kernel.tools import assign_automatic_axes
        return assign_automatic_axes(kernel)

    def save(self, temporary, subkernel):
        self.save_or_reload_impl(temporary, subkernel, "save")

    def reload(self, temporary, subkernel):
        self.save_or_reload_impl(temporary, subkernel, "reload")

    def augment_domain_for_save_or_reload(self, domain, promoted_temporary,
                                          mode, subkernel):
        """
        Add new axes to the domain corresponding to the dimensions of
        `promoted_temporary`. These axes will be used in the save/
        reload stage.
        """
        assert mode in ("save", "reload")
        import islpy as isl

        orig_temporary = promoted_temporary.orig_temporary
        orig_dim = domain.dim(isl.dim_type.set)

        # Tags for newly added inames
        iname_to_tag = {}

        # FIXME: Restrict size of new inames to access footprint.

        # Add dimension-dependent inames.
        dim_inames = []
        domain = domain.add(isl.dim_type.set,
                            len(promoted_temporary.non_hw_dims))

        for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims):
            new_iname = self.insn_name_gen(
                "{name}_{mode}_axis_{dim}_{sk}".format(
                    name=orig_temporary.name,
                    mode=mode,
                    dim=dim_idx,
                    sk=subkernel))
            domain = domain.set_dim_name(isl.dim_type.set, orig_dim + dim_idx,
                                         new_iname)

            if orig_temporary.is_local:
                # If the temporary has local scope, then loads / stores can
                # be done in parallel.
                from loopy.kernel.data import AutoFitLocalIndexTag
                iname_to_tag[new_iname] = AutoFitLocalIndexTag()

            dim_inames.append(new_iname)

            # Add size information.
            aff = isl.affs_from_space(domain.space)
            domain &= aff[0].le_set(aff[new_iname])
            from loopy.symbolic import aff_from_expr
            domain &= aff[new_iname].lt_set(
                aff_from_expr(domain.space, dim_size))

        # FIXME: Use promoted_temporary.hw_inames
        hw_inames = []

        # Add hardware inames duplicates.
        for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames):
            new_iname = self.insn_name_gen(
                "{name}_{mode}_hw_dim_{dim}_{sk}".format(
                    name=orig_temporary.name,
                    mode=mode,
                    dim=t_idx,
                    sk=subkernel))
            hw_inames.append(new_iname)
            iname_to_tag[new_iname] = self.kernel.iname_to_tag[hw_iname]

        from loopy.isl_helpers import duplicate_axes
        domain = duplicate_axes(domain, promoted_temporary.hw_inames,
                                hw_inames)

        # The operations on the domain above return a Set object, but the
        # underlying domain should be expressible as a single BasicSet.
        domain_list = domain.get_basic_set_list()
        assert domain_list.n_basic_set() == 1
        domain = domain_list.get_basic_set(0)
        return domain, hw_inames, dim_inames, iname_to_tag