Exemplo n.º 1
0
    def finish(self):
        new_instructions = []

        insns_to_insert = dict(
            (insn.id, insn) for insn in self.insns_to_insert)

        for orig_insn in self.kernel.instructions:
            if orig_insn.id in self.insns_to_update:
                new_instructions.append(self.insns_to_update[orig_insn.id])
            else:
                new_instructions.append(orig_insn)
        new_instructions.extend(
            sorted(insns_to_insert.values(), key=lambda insn: insn.id))

        self.updated_iname_to_tag.update(self.kernel.iname_to_tag)
        self.updated_temporary_variables.update(
            self.kernel.temporary_variables)

        new_domains = list(self.kernel.domains)
        import islpy as isl
        if self.new_subdomain.dim(isl.dim_type.set) > 0:
            new_domains.append(self.new_subdomain)

        kernel = self.kernel.copy(
            domains=new_domains,
            instructions=new_instructions,
            iname_to_tag=self.updated_iname_to_tag,
            temporary_variables=self.updated_temporary_variables,
            overridden_get_grid_sizes_for_insn_ids=None)

        # Add nosync directives to any saves or reloads that were added with a
        # potential dependency chain.
        from loopy.kernel.tools import get_subkernels
        for subkernel in get_subkernels(kernel):
            relevant_insns = self.subkernel_to_newly_added_insn_ids[subkernel]

            from itertools import product
            for temporary in self.temporary_to_reload_ids:
                for source, sink in product(
                        relevant_insns
                        & self.temporary_to_reload_ids[temporary],
                        relevant_insns
                        & self.temporary_to_save_ids[temporary]):
                    kernel = lp.add_nosync(kernel, "global", source, sink)

        from loopy.kernel.tools import assign_automatic_axes
        return assign_automatic_axes(kernel)
Exemplo n.º 2
0
    def finish(self):
        new_instructions = []

        insns_to_insert = dict((insn.id, insn) for insn in self.insns_to_insert)

        for orig_insn in self.kernel.instructions:
            if orig_insn.id in self.insns_to_update:
                new_instructions.append(self.insns_to_update[orig_insn.id])
            else:
                new_instructions.append(orig_insn)
        new_instructions.extend(
            sorted(insns_to_insert.values(), key=lambda insn: insn.id))

        self.updated_iname_to_tags.update(self.kernel.iname_to_tags)
        self.updated_temporary_variables.update(self.kernel.temporary_variables)

        new_domains = list(self.kernel.domains)
        import islpy as isl
        if self.new_subdomain.dim(isl.dim_type.set) > 0:
            new_domains.append(self.new_subdomain)

        kernel = self.kernel.copy(
            domains=new_domains,
            instructions=new_instructions,
            iname_to_tags=self.updated_iname_to_tags,
            temporary_variables=self.updated_temporary_variables,
            overridden_get_grid_sizes_for_insn_ids=None)

        # Add nosync directives to any saves or reloads that were added with a
        # potential dependency chain.
        from loopy.kernel.tools import get_subkernels
        for subkernel in get_subkernels(kernel):
            relevant_insns = self.subkernel_to_newly_added_insn_ids[subkernel]

            from itertools import product
            for temporary in self.temporary_to_reload_ids:
                for source, sink in product(
                        relevant_insns & self.temporary_to_reload_ids[temporary],
                        relevant_insns & self.temporary_to_save_ids[temporary]):
                    kernel = lp.add_nosync(kernel, "global", source, sink)

        from loopy.kernel.tools import assign_automatic_axes
        return assign_automatic_axes(kernel)
Exemplo n.º 3
0
def check_that_temporaries_are_defined_in_subkernels_where_used(kernel):
    from loopy.kernel.data import AddressSpace
    from loopy.kernel.tools import get_subkernels

    for subkernel in get_subkernels(kernel):
        defined_base_storage = set()

        from loopy.schedule.tools import (temporaries_written_in_subkernel,
                                          temporaries_read_in_subkernel)

        for temporary in temporaries_written_in_subkernel(kernel, subkernel):
            tval = kernel.temporary_variables[temporary]
            if tval.base_storage is not None:
                defined_base_storage.add(tval.base_storage)

        for temporary in (temporaries_read_in_subkernel(kernel, subkernel) -
                          temporaries_written_in_subkernel(kernel, subkernel)):
            tval = kernel.temporary_variables[temporary]

            if tval.initializer is not None:
                continue

            # For aliased temporaries, check if there is an aliased definition.
            if tval.base_storage is not None:
                if tval.base_storage not in defined_base_storage:
                    from loopy.diagnostic import MissingDefinitionError
                    raise MissingDefinitionError(
                        "temporary variable '%s' gets "
                        "used in subkernel '%s' and neither it nor its "
                        "aliases have a definition" % (temporary, subkernel))
                continue

            if tval.address_space in (AddressSpace.PRIVATE,
                                      AddressSpace.LOCAL):
                from loopy.diagnostic import MissingDefinitionError
                raise MissingDefinitionError(
                    "temporary variable '%s' gets used "
                    "in subkernel '%s' without a definition (maybe you forgot "
                    "to call loopy.save_and_reload_temporaries?)" %
                    (temporary, subkernel))
Exemplo n.º 4
0
def check_that_temporaries_are_defined_in_subkernels_where_used(kernel):
    from loopy.kernel.data import AddressSpace
    from loopy.kernel.tools import get_subkernels

    for subkernel in get_subkernels(kernel):
        defined_base_storage = set()

        from loopy.schedule.tools import (
                temporaries_written_in_subkernel, temporaries_read_in_subkernel)

        for temporary in temporaries_written_in_subkernel(kernel, subkernel):
            tval = kernel.temporary_variables[temporary]
            if tval.base_storage is not None:
                defined_base_storage.add(tval.base_storage)

        for temporary in (
                temporaries_read_in_subkernel(kernel, subkernel) -
                temporaries_written_in_subkernel(kernel, subkernel)):
            tval = kernel.temporary_variables[temporary]

            if tval.initializer is not None:
                continue

            # For aliased temporaries, check if there is an aliased definition.
            if tval.base_storage is not None:
                if tval.base_storage not in defined_base_storage:
                    from loopy.diagnostic import MissingDefinitionError
                    raise MissingDefinitionError("temporary variable '%s' gets "
                            "used in subkernel '%s' and neither it nor its "
                            "aliases have a definition" % (temporary, subkernel))
                continue

            if tval.address_space in (AddressSpace.PRIVATE, AddressSpace.LOCAL):
                from loopy.diagnostic import MissingDefinitionError
                raise MissingDefinitionError("temporary variable '%s' gets used "
                        "in subkernel '%s' without a definition (maybe you forgot "
                        "to call loopy.save_and_reload_temporaries?)"
                        % (temporary, subkernel))