Exemplo n.º 1
0
    def get_function_declaration(self, codegen_state, codegen_result,
                                 schedule_index):
        fdecl = super(OpenCLCASTBuilder,
                      self).get_function_declaration(codegen_state,
                                                     codegen_result,
                                                     schedule_index)

        from loopy.target.c import FunctionDeclarationWrapper
        assert isinstance(fdecl, FunctionDeclarationWrapper)
        fdecl = fdecl.subdecl

        from cgen.opencl import CLKernel, CLRequiredWorkGroupSize
        fdecl = CLKernel(fdecl)

        from loopy.schedule import get_insn_ids_for_block_at
        _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
            get_insn_ids_for_block_at(codegen_state.kernel.schedule,
                                      schedule_index))

        from loopy.symbolic import get_dependencies
        if not get_dependencies(local_sizes):
            # sizes can't have parameter dependencies if they are
            # to be used in static WG size.

            fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl)

        return FunctionDeclarationWrapper(fdecl)
Exemplo n.º 2
0
Arquivo: cuda.py Projeto: mmmika/loopy
    def get_function_declaration(self, codegen_state, codegen_result,
            schedule_index):
        fdecl = super(CUDACASTBuilder, self).get_function_declaration(
                codegen_state, codegen_result, schedule_index)

        from loopy.target.c import FunctionDeclarationWrapper
        assert isinstance(fdecl, FunctionDeclarationWrapper)
        fdecl = fdecl.subdecl

        from cgen.cuda import CudaGlobal, CudaLaunchBounds
        fdecl = CudaGlobal(fdecl)

        if self.target.extern_c:
            from cgen import Extern
            fdecl = Extern("C", fdecl)

        from loopy.schedule import get_insn_ids_for_block_at
        _, local_grid_size = \
                codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
                        get_insn_ids_for_block_at(
                            codegen_state.kernel.schedule, schedule_index))

        from loopy.symbolic import get_dependencies
        if not get_dependencies(local_grid_size):
            # Sizes can't have parameter dependencies if they are
            # to be used in static thread block size.
            from pytools import product
            nthreads = product(local_grid_size)

            fdecl = CudaLaunchBounds(nthreads, fdecl)

        return FunctionDeclarationWrapper(fdecl)
Exemplo n.º 3
0
    def get_function_declaration(self, codegen_state, codegen_result,
                                 schedule_index):
        name = codegen_result.current_program(codegen_state).name

        from cgen import (FunctionDeclaration, Value)
        from cgen.ispc import ISPCExport, ISPCTask

        arg_names, arg_decls = self._arg_names_and_decls(codegen_state)

        if codegen_state.is_generating_device_code:
            result = ISPCTask(
                FunctionDeclaration(Value("void", name), arg_decls))
        else:
            result = ISPCExport(
                FunctionDeclaration(Value("void", name), arg_decls))

        from loopy.target.c import FunctionDeclarationWrapper
        return FunctionDeclarationWrapper(result)