示例#1
0
    def __call__(self, kernel, codegen_result):
        """
        Generates the wrapping python invoker for this execution target

        :arg kernel: the loopy :class:`LoopKernel`(s) to be executued
        :codegen_result: the loopy :class:`CodeGenerationResult` created
        by code generation

        :returns: A python callable that handles execution of this
            kernel
        """

        options = kernel.options
        implemented_data_info = codegen_result.implemented_data_info

        from loopy.kernel.data import KernelArgument
        gen = PythonFunctionGenerator(
            "invoke_%s_loopy_kernel" % kernel.name, self.system_args + [
                "%s=None" % idi.name for idi in implemented_data_info
                if issubclass(idi.arg_class, KernelArgument)
            ])

        gen.add_to_preamble("from __future__ import division")
        gen.add_to_preamble("")
        self.target_specific_preamble(gen)
        gen.add_to_preamble("")
        self.generate_host_code(gen, codegen_result)
        gen.add_to_preamble("")

        self.initialize_system_args(gen)

        self.generate_integer_arg_finding_from_shapes(gen, kernel,
                                                      implemented_data_info)
        self.generate_integer_arg_finding_from_offsets(gen, kernel,
                                                       implemented_data_info)
        self.generate_integer_arg_finding_from_strides(gen, kernel,
                                                       implemented_data_info)
        self.generate_value_arg_check(gen, kernel, implemented_data_info)

        args = self.generate_arg_setup(gen, kernel, implemented_data_info,
                                       options)

        self.generate_invocation(gen, codegen_result.host_program.name, args,
                                 kernel, implemented_data_info)

        self.generate_output_handler(gen, options, kernel,
                                     implemented_data_info)

        if options.write_wrapper:
            output = gen.get()
            if options.highlight_wrapper:
                output = get_highlighted_python_code(output)

            if options.write_wrapper is True:
                print(output)
            else:
                with open(options.write_wrapper, "w") as outf:
                    outf.write(output)

        return gen.get_picklable_function()
示例#2
0
    def _cache_kernel_stats(self, t_unit: lp.TranslationUnit, kwargs: dict) \
      -> tuple:
        """Generate the kernel stats for a program with its args."""
        args_tuple = tuple(
            (key, value.shape) if hasattr(value, "shape") else (key, value)
            for key, value in kwargs.items())

        # Are kernel stats already in the cache?
        try:
            self.kernel_stats[t_unit][args_tuple]
            return args_tuple
        except KeyError:
            # If not, calculate and cache the stats
            ep_name = t_unit.default_entrypoint.name
            executor = t_unit.target.get_kernel_executor(t_unit,
                                                         self.queue,
                                                         entrypoint=ep_name)
            info = executor.translation_unit_info(
                ep_name, executor.arg_to_dtype_set(kwargs))

            typed_t_unit = executor.get_typed_and_scheduled_translation_unit(
                ep_name, executor.arg_to_dtype_set(kwargs))
            kernel = typed_t_unit[ep_name]

            idi = info.implemented_data_info

            param_dict = kwargs.copy()
            param_dict.update({
                k: None
                for k in kernel.arg_dict.keys() if k not in param_dict
            })

            param_dict.update(
                {d.name: None
                 for d in idi if d.name not in param_dict})

            # Generate the wrapper code
            wrapper = executor.get_wrapper_generator()

            gen = PythonFunctionGenerator("_mcom_gen_args_profile",
                                          list(param_dict))

            wrapper.generate_integer_arg_finding_from_shapes(gen, kernel, idi)
            wrapper.generate_integer_arg_finding_from_offsets(gen, kernel, idi)
            wrapper.generate_integer_arg_finding_from_strides(gen, kernel, idi)

            param_names = kernel.all_params()
            gen("return {%s}" % ", ".join(f"{repr(name)}: {name}"
                                          for name in param_names))

            # Run the wrapper code, save argument values in domain_params
            domain_params = gen.get_picklable_function()(**param_dict)

            # Get flops/memory statistics
            op_map = lp.get_op_map(typed_t_unit, subgroup_size="guess")
            bytes_accessed = lp.get_mem_access_map(
                typed_t_unit, subgroup_size="guess") \
                            .to_bytes().eval_and_sum(domain_params)

            flops = op_map.filter_by(
                dtype=[np.float32, np.float64]).eval_and_sum(domain_params)

            # Footprint gathering is not yet available in loopy with
            # kernel callables:
            # https://github.com/inducer/loopy/issues/399
            if 0:
                try:
                    footprint = lp.gather_access_footprint_bytes(typed_t_unit)
                    footprint_bytes = sum(
                        footprint[k].eval_with_dict(domain_params)
                        for k in footprint)

                except lp.symbolic.UnableToDetermineAccessRange:
                    footprint_bytes = None
            else:
                footprint_bytes = None

            res = SingleCallKernelProfile(time=0,
                                          flops=flops,
                                          bytes_accessed=bytes_accessed,
                                          footprint_bytes=footprint_bytes)

            self.kernel_stats.setdefault(t_unit, {})[args_tuple] = res

            if self.logmgr:
                if f"{ep_name}_time" not in self.logmgr.quantity_data:
                    self.logmgr.add_quantity(KernelProfile(self, ep_name))

            return args_tuple
示例#3
0
    def __call__(self, kernel, codegen_result):
        """
        Generates the wrapping python invoker for this execution target

        :arg kernel: the loopy :class:`LoopKernel`(s) to be executued
        :codegen_result: the loopy :class:`CodeGenerationResult` created
        by code generation

        :returns: A python callable that handles execution of this
            kernel
        """

        options = kernel.options
        implemented_data_info = codegen_result.implemented_data_info

        from loopy.kernel.data import KernelArgument
        gen = PythonFunctionGenerator(
                "invoke_%s_loopy_kernel" % kernel.name,
                self.system_args + [
                    "%s=None" % idi.name
                    for idi in implemented_data_info
                    if issubclass(idi.arg_class, KernelArgument)
                    ])

        gen.add_to_preamble("from __future__ import division")
        gen.add_to_preamble("")
        self.target_specific_preamble(gen)
        gen.add_to_preamble("")
        self.generate_host_code(gen, codegen_result)
        gen.add_to_preamble("")

        self.initialize_system_args(gen)

        self.generate_integer_arg_finding_from_shapes(
            gen, kernel, implemented_data_info)
        self.generate_integer_arg_finding_from_offsets(
            gen, kernel, implemented_data_info)
        self.generate_integer_arg_finding_from_strides(
            gen, kernel, implemented_data_info)
        self.generate_value_arg_check(
            gen, kernel, implemented_data_info)

        args = self.generate_arg_setup(
            gen, kernel, implemented_data_info, options)

        self.generate_invocation(gen, codegen_result.host_program.name, args,
                kernel, implemented_data_info)

        self.generate_output_handler(gen, options, kernel, implemented_data_info)

        if options.write_wrapper:
            output = gen.get()
            if options.highlight_wrapper:
                output = get_highlighted_python_code(output)

            if options.write_wrapper is True:
                print(output)
            else:
                with open(options.write_wrapper, "w") as outf:
                    outf.write(output)

        return gen.get_picklable_function()
示例#4
0
    def _cache_kernel_stats(self, program: lp.kernel.LoopKernel, kwargs: dict) \
      -> tuple:
        """Generate the kernel stats for a program with its args."""
        args_tuple = tuple(
            (key, value.shape) if hasattr(value, "shape") else (key, value)
            for key, value in kwargs.items())

        # Are kernel stats already in the cache?
        try:
            x = self.kernel_stats[program][args_tuple]  # noqa
            return args_tuple
        except KeyError:
            # If not, calculate and cache the stats
            executor = program.target.get_kernel_executor(program, self.queue)
            info = executor.kernel_info(executor.arg_to_dtype_set(kwargs))

            kernel = executor.get_typed_and_scheduled_kernel(
                executor.arg_to_dtype_set(kwargs))

            idi = info.implemented_data_info

            types = {
                k: v
                for k, v in kwargs.items()
                if hasattr(v, "dtype") and not v.dtype == object
            }

            param_dict = kwargs.copy()
            param_dict.update({
                k: None
                for k in kernel.arg_dict.keys() if k not in param_dict
            })

            param_dict.update(
                {d.name: None
                 for d in idi if d.name not in param_dict})

            # Generate the wrapper code
            wrapper = executor.get_wrapper_generator()

            gen = PythonFunctionGenerator("_mcom_gen_args_profile",
                                          list(param_dict))

            wrapper.generate_integer_arg_finding_from_shapes(gen, kernel, idi)
            wrapper.generate_integer_arg_finding_from_offsets(gen, kernel, idi)
            wrapper.generate_integer_arg_finding_from_strides(gen, kernel, idi)

            param_names = program.all_params()
            gen("return {%s}" % ", ".join(f"{repr(name)}: {name}"
                                          for name in param_names))

            # Run the wrapper code, save argument values in domain_params
            domain_params = gen.get_picklable_function()(**param_dict)

            # Get flops/memory statistics
            kernel = lp.add_and_infer_dtypes(kernel, types)
            op_map = lp.get_op_map(kernel, subgroup_size="guess")
            bytes_accessed = lp.get_mem_access_map(kernel, subgroup_size="guess") \
              .to_bytes().eval_and_sum(domain_params)

            flops = op_map.filter_by(
                dtype=[np.float32, np.float64]).eval_and_sum(domain_params)

            try:
                footprint = lp.gather_access_footprint_bytes(kernel)
                footprint_bytes = sum(
                    footprint[k].eval_with_dict(domain_params)
                    for k in footprint)

            except lp.symbolic.UnableToDetermineAccessRange:
                footprint_bytes = None

            res = ProfileResult(time=0,
                                flops=flops,
                                bytes_accessed=bytes_accessed,
                                footprint_bytes=footprint_bytes)

            self.kernel_stats.setdefault(program, {})[args_tuple] = res
            return args_tuple