def get_function_declaration(self, codegen_state, codegen_result, schedule_index): fdecl = super().get_function_declaration( codegen_state, codegen_result, schedule_index) from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) if not codegen_state.is_entrypoint: # auxiliary kernels need not mention opencl speicific qualifiers # for a functions signature return fdecl fdecl = fdecl.subdecl from cgen.opencl import CLKernel, CLRequiredWorkGroupSize fdecl = CLKernel(fdecl) from loopy.schedule import get_insn_ids_for_block_at _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( codegen_state.kernel.linearization, schedule_index), codegen_state.callables_table) from loopy.symbolic import get_dependencies if not get_dependencies(local_sizes): # sizes can't have parameter dependencies if they are # to be used in static WG size. fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl) return FunctionDeclarationWrapper(fdecl)
def get_function_declaration(self, codegen_state, codegen_result, schedule_index): fdecl = super(OpenCLCASTBuilder, self).get_function_declaration(codegen_state, codegen_result, schedule_index) from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) fdecl = fdecl.subdecl from cgen.opencl import CLKernel, CLRequiredWorkGroupSize fdecl = CLKernel(fdecl) from loopy.schedule import get_insn_ids_for_block_at _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(codegen_state.kernel.schedule, schedule_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_sizes): # sizes can't have parameter dependencies if they are # to be used in static WG size. fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl) return FunctionDeclarationWrapper(fdecl)
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b) c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes) from cgen import FunctionBody, \ FunctionDeclaration, POD, Value, \ Pointer, Module, Block, Initializer, Assign, Const from cgen.opencl import CLKernel, CLGlobal, \ CLRequiredWorkGroupSize mod = Module([ FunctionBody( CLKernel( CLRequiredWorkGroupSize( (local_size, ), FunctionDeclaration(Value("void", "add"), arg_decls=[ CLGlobal( Pointer(Const(POD(dtype, name)))) for name in ["tgt", "op1", "op2"] ]))), Block([ Initializer( POD(numpy.int32, "idx"), "get_local_id(0) + %d * get_group_id(0)" % (local_size * thread_strides)) ] + [ Assign( "tgt[idx+%d]" % (o * local_size), "op1[idx+%d] + op2[idx+%d]" % (o * local_size, o * local_size)) for o in range(thread_strides) ]))