Exemplo n.º 1
0
    def get_function_declaration(self, codegen_state, codegen_result,
            schedule_index):
        fdecl = super().get_function_declaration(
                codegen_state, codegen_result, schedule_index)

        from loopy.target.c import FunctionDeclarationWrapper
        assert isinstance(fdecl, FunctionDeclarationWrapper)
        if not codegen_state.is_entrypoint:
            # auxiliary kernels need not mention opencl speicific qualifiers
            # for a functions signature
            return fdecl

        fdecl = fdecl.subdecl

        from cgen.opencl import CLKernel, CLRequiredWorkGroupSize
        fdecl = CLKernel(fdecl)

        from loopy.schedule import get_insn_ids_for_block_at
        _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
                get_insn_ids_for_block_at(
                    codegen_state.kernel.linearization, schedule_index),
                codegen_state.callables_table)

        from loopy.symbolic import get_dependencies
        if not get_dependencies(local_sizes):
            # sizes can't have parameter dependencies if they are
            # to be used in static WG size.

            fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl)

        return FunctionDeclarationWrapper(fdecl)
Exemplo n.º 2
0
    def get_function_declaration(self, codegen_state, codegen_result,
                                 schedule_index):
        fdecl = super(OpenCLCASTBuilder,
                      self).get_function_declaration(codegen_state,
                                                     codegen_result,
                                                     schedule_index)

        from loopy.target.c import FunctionDeclarationWrapper
        assert isinstance(fdecl, FunctionDeclarationWrapper)
        fdecl = fdecl.subdecl

        from cgen.opencl import CLKernel, CLRequiredWorkGroupSize
        fdecl = CLKernel(fdecl)

        from loopy.schedule import get_insn_ids_for_block_at
        _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
            get_insn_ids_for_block_at(codegen_state.kernel.schedule,
                                      schedule_index))

        from loopy.symbolic import get_dependencies
        if not get_dependencies(local_sizes):
            # sizes can't have parameter dependencies if they are
            # to be used in static WG size.

            fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl)

        return FunctionDeclarationWrapper(fdecl)
Exemplo n.º 3
0
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)

from cgen import FunctionBody, \
        FunctionDeclaration, POD, Value, \
        Pointer, Module, Block, Initializer, Assign, Const
from cgen.opencl import CLKernel, CLGlobal, \
        CLRequiredWorkGroupSize

mod = Module([
    FunctionBody(
        CLKernel(
            CLRequiredWorkGroupSize(
                (local_size, ),
                FunctionDeclaration(Value("void", "add"),
                                    arg_decls=[
                                        CLGlobal(
                                            Pointer(Const(POD(dtype, name))))
                                        for name in ["tgt", "op1", "op2"]
                                    ]))),
        Block([
            Initializer(
                POD(numpy.int32,
                    "idx"), "get_local_id(0) + %d * get_group_id(0)" %
                (local_size * thread_strides))
        ] + [
            Assign(
                "tgt[idx+%d]" % (o * local_size), "op1[idx+%d] + op2[idx+%d]" %
                (o * local_size, o * local_size))
            for o in range(thread_strides)
        ]))