def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, arguments=None, name="reduce_kernel", preamble="", map_exprs=None, device=None, options=[], max_group_size=None): if map_exprs is None: raise ValueError("map_exprs has to be given!") for i, m in enumerate(map_exprs): if m is None: if stage == 2: map_exprs[i] = "pyopencl_reduction_inp_%i[i]" % i else: map_exprs[i] = "in[i]" from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) arg_prep = "" if stage == 1 and arguments is not None: arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage == 2 and arguments is not None: arguments = parse_arg_list(arguments) arguments = ([ VectorArg(dtype_out, "pyopencl_reduction_inp_%i" % i) for i in range(len(map_exprs)) ] + arguments) inf = _get_reduction_source(ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_exprs, arguments, name, preamble, arg_prep, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = arguments inf.kernel.set_scalar_arg_dtypes( [ None, ] * len(map_exprs) + [np.int64] + get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.uint32] * 2) return inf
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, arguments=None, name="reduce_kernel", preamble="", map_exprs = None, device=None, options=[], max_group_size=None): if map_exprs is None: raise ValueError("map_exprs has to be given!") for i, m in enumerate(map_exprs): if m is None: if stage==2: map_exprs[i] = "pyopencl_reduction_inp_%i[i]"%i else: map_exprs[i] = "in[i]" from pyopencl.tools import ( parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) arg_prep = "" if stage==1 and arguments is not None: arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage==2 and arguments is not None: arguments = parse_arg_list(arguments) arguments = ( [VectorArg(dtype_out, "pyopencl_reduction_inp_%i"%i) for i in xrange(len(map_exprs))] +arguments) inf = _get_reduction_source( ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_exprs, arguments, name, preamble, arg_prep, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = arguments inf.kernel.set_scalar_arg_dtypes( [None, ]*len(map_exprs)+[np.int64] +get_arg_list_scalar_arg_dtypes(inf.arg_types) +[np.uint32]*2) return inf
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", device=None, options=[], max_group_size=None): if map_expr is None: if stage == 2: map_expr = "pyopencl_reduction_inp[i]" else: map_expr = "in[i]" from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) arg_prep = "" if stage == 1 and arguments is not None: arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage == 2 and arguments is not None: arguments = parse_arg_list(arguments) arguments = ([VectorArg(dtype_out, "pyopencl_reduction_inp")] + arguments) inf = _get_reduction_source(ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_expr, arguments, name, preamble, arg_prep, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = arguments inf.kernel.set_scalar_arg_dtypes( [None, np.int64] + get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.int64] * 3 + [np.uint32, np.int64]) return inf
def get_elwise_kernel_and_types(context, arguments, operation, name="elwise_kernel", options=[], preamble="", use_range=False, **kwargs): from pyopencl.tools import parse_arg_list parsed_args = parse_arg_list(arguments) auto_preamble = kwargs.pop("auto_preamble", True) pragmas = [] includes = [] have_double_pragma = False have_complex_include = False if auto_preamble: for arg in parsed_args: if arg.dtype in [np.float64, np.complex128]: if not have_double_pragma: pragmas.append( "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n" "#define PYOPENCL_DEFINE_CDOUBLE\n") have_double_pragma = True if arg.dtype.kind == 'c': if not have_complex_include: includes.append("#include <pyopencl-complex.h>\n") have_complex_include = True if pragmas or includes: preamble = "\n".join(pragmas + includes) + "\n" + preamble if use_range: parsed_args.extend([ ScalarArg(np.intp, "start"), ScalarArg(np.intp, "stop"), ScalarArg(np.intp, "step"), ]) else: parsed_args.append(ScalarArg(np.intp, "n")) prg = get_elwise_program(context, parsed_args, operation, name=name, options=options, preamble=preamble, use_range=use_range, **kwargs) from pyopencl.tools import get_arg_list_scalar_arg_dtypes kernel = getattr(prg, name) kernel.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(parsed_args)) return kernel, parsed_args
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", device=None, options=[], max_group_size=None): if map_expr is None: if stage == 2: map_expr = "pyopencl_reduction_inp[i]" else: map_expr = "in[i]" from pyopencl.tools import ( parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) arg_prep = "" if stage == 1 and arguments is not None: arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage == 2 and arguments is not None: arguments = parse_arg_list(arguments) arguments = ( [VectorArg(dtype_out, "pyopencl_reduction_inp")] + arguments) inf = _get_reduction_source( ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_expr, arguments, name, preamble, arg_prep, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = arguments inf.kernel.set_scalar_arg_dtypes( [None, np.int64] + get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.int64]*3 + [np.uint32, np.int64] ) return inf
def get_elwise_kernel_and_types(context, arguments, operation, name="elwise_kernel", options=[], preamble="", use_range=False, **kwargs): from pyopencl.tools import parse_arg_list, get_arg_offset_adjuster_code parsed_args = parse_arg_list(arguments, with_offset=True) auto_preamble = kwargs.pop("auto_preamble", True) pragmas = [] includes = [] have_double_pragma = False have_complex_include = False if auto_preamble: for arg in parsed_args: if arg.dtype in [np.float64, np.complex128]: if not have_double_pragma: pragmas.append(""" #if __OPENCL_C_VERSION__ < 120 #pragma OPENCL EXTENSION cl_khr_fp64: enable #endif #define PYOPENCL_DEFINE_CDOUBLE """) have_double_pragma = True if arg.dtype.kind == 'c': if not have_complex_include: includes.append("#include <pyopencl-complex.h>\n") have_complex_include = True if pragmas or includes: preamble = "\n".join(pragmas+includes) + "\n" + preamble if use_range: parsed_args.extend([ ScalarArg(np.intp, "start"), ScalarArg(np.intp, "stop"), ScalarArg(np.intp, "step"), ]) else: parsed_args.append(ScalarArg(np.intp, "n")) loop_prep = kwargs.pop("loop_prep", "") loop_prep = get_arg_offset_adjuster_code(parsed_args) + loop_prep prg = get_elwise_program( context, parsed_args, operation, name=name, options=options, preamble=preamble, use_range=use_range, loop_prep=loop_prep, **kwargs) from pyopencl.tools import get_arg_list_scalar_arg_dtypes kernel = getattr(prg, name) kernel.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(parsed_args)) return kernel, parsed_args
def __init__(self, ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", options=None, preamble=""): from pyopencl.tools import parse_arg_list arguments = parse_arg_list(arguments, with_offset=True) dtype_out = self.dtype_out = np.dtype(dtype_out) max_group_size = None trip_count = 0 while True: self.stage_1_inf = get_reduction_kernel( 1, ctx, dtype_out, neutral, reduce_expr, map_expr, arguments, name=name + "_stage1", options=options, preamble=preamble, max_group_size=max_group_size) kernel_max_wg_size = self.stage_1_inf.kernel.get_work_group_info( cl.kernel_work_group_info.WORK_GROUP_SIZE, ctx.devices[0]) if self.stage_1_inf.group_size <= kernel_max_wg_size: break else: max_group_size = kernel_max_wg_size trip_count += 1 assert trip_count <= 2 self.stage_2_inf = get_reduction_kernel(2, ctx, dtype_out, neutral, reduce_expr, arguments=arguments, name=name + "_stage2", options=options, preamble=preamble, max_group_size=max_group_size)
def get_reduction_kernel(stage, ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", device=None, options=None, max_group_size=None): if map_expr is None: if stage == 2: map_expr = "pyopencl_reduction_inp[i]" else: map_expr = "in[i]" from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code, VectorArg) if arguments is None: raise ValueError("arguments must not be None") arguments = parse_arg_list(arguments, with_offset=True) arg_prep = get_arg_offset_adjuster_code(arguments) if stage == 2 and arguments is not None: arguments = ([VectorArg(dtype_out, "pyopencl_reduction_inp")] + arguments) source, group_size = _get_reduction_source(ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize, neutral, reduce_expr, map_expr, arguments, name, preamble, arg_prep, device, max_group_size) program = cl.Program(ctx, source) program.build(options) kernel = getattr(program, name) kernel.set_scalar_arg_dtypes([None, np.int64] + get_arg_list_scalar_arg_dtypes(arguments) + [np.int64] * 3 + [np.uint32, np.int64]) return _ReductionInfo(context=ctx, source=source, group_size=group_size, program=program, kernel=kernel, arg_types=arguments)
def get_reduction_kernel(stage, ctx, out_type, out_type_size, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", device=None, options=[], max_group_size=None): if map_expr is None: if stage == 2: map_expr = "pyopencl_reduction_inp[i]" else: map_expr = "in[i]" if stage == 2: in_arg = "const %s *pyopencl_reduction_inp" % out_type if arguments: arguments = in_arg + ", " + arguments else: arguments = in_arg from pyopencl.tools import parse_arg_list, get_arg_list_scalar_arg_dtypes parsed_args = parse_arg_list(arguments) inf = _get_reduction_source(ctx, out_type, out_type_size, neutral, reduce_expr, map_expr, parsed_args, name, preamble, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = parsed_args inf.kernel.set_scalar_arg_dtypes( [None] + get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.uint32] * 2) return inf
def get_reduction_kernel(stage, ctx, out_type, out_type_size, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", device=None, options=[], max_group_size=None): if map_expr is None: if stage == 2: map_expr = "pyopencl_reduction_inp[i]" else: map_expr = "in[i]" if stage == 2: in_arg = "const %s *pyopencl_reduction_inp" % out_type if arguments: arguments = in_arg + ", " + arguments else: arguments = in_arg from pyopencl.tools import parse_arg_list, get_arg_list_scalar_arg_dtypes parsed_args = parse_arg_list(arguments) inf = _get_reduction_source( ctx, out_type, out_type_size, neutral, reduce_expr, map_expr, parsed_args, name, preamble, device, max_group_size) inf.program = cl.Program(ctx, inf.source) inf.program.build(options) inf.kernel = getattr(inf.program, name) inf.arg_types = parsed_args inf.kernel.set_scalar_arg_dtypes( [None] + get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.uint32]*2) return inf
def __init__(self, context, list_names_and_dtypes, generate_template, arg_decls, count_sharing=None, devices=None, name_prefix="plb_build_list", options=[], preamble="", debug=False, complex_kernel=False): """ :arg context: A :class:`pyopencl.Context`. :arg list_names_and_dtypes: a list of `(name, dtype)` tuples indicating the lists to be built. :arg generate_template: a snippet of C as described below :arg arg_decls: A string of comma-separated C argument declarations. :arg count_sharing: A mapping consisting of `(child, mother)` indicating that `mother` and `child` will always have the same number of indices, and the `APPEND` to `mother` will always happen *before* the `APPEND` to the child. :arg name_prefix: the name prefix to use for the compiled kernels :arg options: OpenCL compilation options for kernels using *generate_template*. :arg complex_kernel: If `True`, prevents vectorization on CPUs. *generate_template* may use the following C macros/identifiers: * `index_type`: expands to C identifier for the index type used for the calculation * `USER_ARG_DECL`: expands to the C declarator for `arg_decls` * `USER_ARGS`: a list of C argument values corresponding to `user_arg_decl` * `LIST_ARG_DECL`: expands to a C argument list representing the data for the output lists. These are escaped prefixed with `"plg_"` so as to not interfere with user-provided names. * `LIST_ARGS`: a list of C argument values corresponding to `LIST_ARG_DECL` * `APPEND_name(entry)`: inserts `entry` into the list `name`. *entry* must be a valid C expression of the correct type. All argument-list related macros have a trailing comma included if they are non-empty. *generate_template* must supply a function: .. code-block:: c void generate(USER_ARG_DECL LIST_ARG_DECL index_type i) { APPEND_mylist(5); } Internally, the `kernel_template` is expanded (at least) twice. Once, for a 'counting' stage where the size of all the lists is determined, and a second time, for a 'generation' stage where the lists are actually filled. A `generate` function that has side effects beyond calling `append` is therefore ill-formed. """ if devices is None: devices = context.devices if count_sharing is None: count_sharing = {} self.context = context self.devices = devices self.list_names_and_dtypes = list_names_and_dtypes self.generate_template = generate_template from pyopencl.tools import parse_arg_list self.arg_decls = parse_arg_list(arg_decls) self.count_sharing = count_sharing self.name_prefix = name_prefix self.preamble = preamble self.options = options self.debug = debug self.complex_kernel = complex_kernel
def __init__(self, context, arguments, key_expr, sort_arg_names, bits_at_a_time=2, index_dtype=np.int32, key_dtype=np.uint32, options=[]): """ :arg arguments: A string of comma-separated C argument declarations. If *arguments* is specified, then *input_expr* must also be specified. All types used here must be known to PyOpenCL. (see :func:`pyopencl.tools.get_or_register_dtype`). :arg key_expr: An integer-valued C expression returning the key based on which the sort is performed. The array index for which the key is to be computed is available as `i`. The expression may refer to any of the *arguments*. :arg sort_arg_names: A list of argument names whose corresponding array arguments will be sorted according to *key_expr*. """ # {{{ arg processing from pyopencl.tools import parse_arg_list self.arguments = parse_arg_list(arguments) del arguments self.sort_arg_names = sort_arg_names self.bits = int(bits_at_a_time) self.index_dtype = np.dtype(index_dtype) self.key_dtype = np.dtype(key_dtype) self.options = options # }}} # {{{ kernel creation scan_ctype, scan_dtype, scan_t_cdecl = \ _make_sort_scan_type(context.devices[0], self.bits, self.index_dtype) from pyopencl.tools import VectorArg, ScalarArg scan_arguments = (list(self.arguments) + [ VectorArg(arg.dtype, "sorted_" + arg.name) for arg in self.arguments if arg.name in sort_arg_names ] + [ScalarArg(np.int32, "base_bit")]) def get_count_branch(known_bits): if len(known_bits) == self.bits: return "s.c%s" % known_bits boundary_mnr = known_bits + "1" + (self.bits - len(known_bits) - 1) * "0" return ("((mnr < %s) ? %s : %s)" % (int(boundary_mnr, 2), get_count_branch(known_bits + "0"), get_count_branch(known_bits + "1"))) codegen_args = dict( bits=self.bits, key_ctype=dtype_to_ctype(self.key_dtype), key_expr=key_expr, index_ctype=dtype_to_ctype(self.index_dtype), index_type_max=np.iinfo(self.index_dtype).max, padded_bin=_padded_bin, scan_ctype=scan_ctype, sort_arg_names=sort_arg_names, get_count_branch=get_count_branch, ) preamble = scan_t_cdecl + RADIX_SORT_PREAMBLE_TPL.render( **codegen_args) scan_preamble = preamble \ + RADIX_SORT_SCAN_PREAMBLE_TPL.render(**codegen_args) from pyopencl.scan import GenericScanKernel self.scan_kernel = GenericScanKernel( context, scan_dtype, arguments=scan_arguments, input_expr="scan_t_from_value(%s, base_bit, i)" % key_expr, scan_expr="scan_t_add(a, b, across_seg_boundary)", neutral="scan_t_neutral()", output_statement=RADIX_SORT_OUTPUT_STMT_TPL.render(**codegen_args), preamble=scan_preamble, options=self.options) for i, arg in enumerate(self.arguments): if isinstance(arg, VectorArg): self.first_array_arg_idx = i
def __init__( self, context, list_names_and_dtypes, generate_template, arg_decls, count_sharing=None, devices=None, name_prefix="plb_build_list", options=[], preamble="", debug=False, complex_kernel=False, ): """ :arg context: A :class:`pyopencl.Context`. :arg list_names_and_dtypes: a list of `(name, dtype)` tuples indicating the lists to be built. :arg generate_template: a snippet of C as described below :arg arg_decls: A string of comma-separated C argument declarations. :arg count_sharing: A mapping consisting of `(child, mother)` indicating that `mother` and `child` will always have the same number of indices, and the `APPEND` to `mother` will always happen *before* the `APPEND` to the child. :arg name_prefix: the name prefix to use for the compiled kernels :arg options: OpenCL compilation options for kernels using *generate_template*. :arg complex_kernel: If `True`, prevents vectorization on CPUs. *generate_template* may use the following C macros/identifiers: * `index_type`: expands to C identifier for the index type used for the calculation * `USER_ARG_DECL`: expands to the C declarator for `arg_decls` * `USER_ARGS`: a list of C argument values corresponding to `user_arg_decl` * `LIST_ARG_DECL`: expands to a C argument list representing the data for the output lists. These are escaped prefixed with `"plg_"` so as to not interfere with user-provided names. * `LIST_ARGS`: a list of C argument values corresponding to `LIST_ARG_DECL` * `APPEND_name(entry)`: inserts `entry` into the list `name`. Both arguments are Python strings, the latter representing a valid C expression of the correct dtype. All argument-list related macros have a trailing comma included if they are non-empty. *generate_template* must supply a function: .. code-block:: c void generate(USER_ARG_DECL LIST_ARG_DECL index_type i) { APPEND(mylist, 5); } Internally, the `kernel_template` is expanded (at least) twice. Once, for a 'counting' stage where the size of all the lists is determined, and a second time, for a 'generation' stage where the lists are actually filled. A `generate` function that has side effects beyond calling `append` is therefore ill-formed. """ if devices is None: devices = context.devices if count_sharing is None: count_sharing = {} self.context = context self.devices = devices self.list_names_and_dtypes = list_names_and_dtypes self.generate_template = generate_template from pyopencl.tools import parse_arg_list self.arg_decls = parse_arg_list(arg_decls) self.count_sharing = count_sharing self.name_prefix = name_prefix self.preamble = preamble self.options = options self.debug = debug self.complex_kernel = complex_kernel
def __init__( self, context, arguments, key_expr, sort_arg_names, bits_at_a_time=2, index_dtype=np.int32, key_dtype=np.uint32, options=[], ): """ :arg arguments: A string of comma-separated C argument declarations. If *arguments* is specified, then *input_expr* must also be specified. All types used here must be known to PyOpenCL. (see :func:`pyopencl.tools.get_or_register_dtype`). :arg key_expr: An integer-valued C expression returning the key based on which the sort is performed. The array index for which the key is to be computed is available as `i`. The expression may refer to any of the *arguments*. :arg sort_arg_names: A list of argument names whose corresponding array arguments will be sorted according to *key_expr*. """ # {{{ arg processing from pyopencl.tools import parse_arg_list self.arguments = parse_arg_list(arguments) del arguments self.sort_arg_names = sort_arg_names self.bits = int(bits_at_a_time) self.index_dtype = np.dtype(index_dtype) self.key_dtype = np.dtype(key_dtype) self.options = options # }}} # {{{ kernel creation scan_ctype, scan_dtype, scan_t_cdecl = _make_sort_scan_type(context.devices[0], self.bits, self.index_dtype) from pyopencl.tools import VectorArg, ScalarArg scan_arguments = ( list(self.arguments) + [VectorArg(arg.dtype, "sorted_" + arg.name) for arg in self.arguments if arg.name in sort_arg_names] + [ScalarArg(np.int32, "base_bit")] ) def get_count_branch(known_bits): if len(known_bits) == self.bits: return "s.c%s" % known_bits boundary_mnr = known_bits + "1" + (self.bits - len(known_bits) - 1) * "0" return "((mnr < %s) ? %s : %s)" % ( int(boundary_mnr, 2), get_count_branch(known_bits + "0"), get_count_branch(known_bits + "1"), ) codegen_args = dict( bits=self.bits, key_ctype=dtype_to_ctype(self.key_dtype), key_expr=key_expr, index_ctype=dtype_to_ctype(self.index_dtype), index_type_max=np.iinfo(self.index_dtype).max, padded_bin=_padded_bin, scan_ctype=scan_ctype, sort_arg_names=sort_arg_names, get_count_branch=get_count_branch, ) preamble = scan_t_cdecl + RADIX_SORT_PREAMBLE_TPL.render(**codegen_args) scan_preamble = preamble + RADIX_SORT_SCAN_PREAMBLE_TPL.render(**codegen_args) from pyopencl.scan import GenericScanKernel self.scan_kernel = GenericScanKernel( context, scan_dtype, arguments=scan_arguments, input_expr="scan_t_from_value(%s, base_bit, i)" % key_expr, scan_expr="scan_t_add(a, b, across_seg_boundary)", neutral="scan_t_neutral()", output_statement=RADIX_SORT_OUTPUT_STMT_TPL.render(**codegen_args), preamble=scan_preamble, options=self.options, ) for i, arg in enumerate(self.arguments): if isinstance(arg, VectorArg): self.first_array_arg_idx = i
def __init__(self, context, list_names_and_dtypes, generate_template, arg_decls, count_sharing=None, devices=None, name_prefix="plb_build_list", options=[], preamble="", debug=False, complex_kernel=False, eliminate_empty_output_lists=[]): """ :arg context: A :class:`pyopencl.Context`. :arg list_names_and_dtypes: a list of `(name, dtype)` tuples indicating the lists to be built. :arg generate_template: a snippet of C as described below :arg arg_decls: A string of comma-separated C argument declarations. :arg count_sharing: A mapping consisting of `(child, mother)` indicating that `mother` and `child` will always have the same number of indices, and the `APPEND` to `mother` will always happen *before* the `APPEND` to the child. :arg name_prefix: the name prefix to use for the compiled kernels :arg options: OpenCL compilation options for kernels using *generate_template*. :arg complex_kernel: If `True`, prevents vectorization on CPUs. :arg eliminate_empty_output_lists: A Python list of list names for which the empty output lists are eliminated. *generate_template* may use the following C macros/identifiers: * `index_type`: expands to C identifier for the index type used for the calculation * `USER_ARG_DECL`: expands to the C declarator for `arg_decls` * `USER_ARGS`: a list of C argument values corresponding to `user_arg_decl` * `LIST_ARG_DECL`: expands to a C argument list representing the data for the output lists. These are escaped prefixed with `"plg_"` so as to not interfere with user-provided names. * `LIST_ARGS`: a list of C argument values corresponding to `LIST_ARG_DECL` * `APPEND_name(entry)`: inserts `entry` into the list `name`. *entry* must be a valid C expression of the correct type. All argument-list related macros have a trailing comma included if they are non-empty. *generate_template* must supply a function: .. code-block:: c void generate(USER_ARG_DECL LIST_ARG_DECL index_type i) { APPEND_mylist(5); } Internally, the `kernel_template` is expanded (at least) twice. Once, for a 'counting' stage where the size of all the lists is determined, and a second time, for a 'generation' stage where the lists are actually filled. A `generate` function that has side effects beyond calling `append` is therefore ill-formed. .. versionchanged:: 2018.1 Change *eliminate_empty_output_lists* argument type from `bool` to `list`. """ if devices is None: devices = context.devices if count_sharing is None: count_sharing = {} self.context = context self.devices = devices self.list_names_and_dtypes = list_names_and_dtypes self.generate_template = generate_template from pyopencl.tools import parse_arg_list self.arg_decls = parse_arg_list(arg_decls) # To match with the signature of the user-supplied generate(), arguments # can't appear to have offsets. arg_decls_no_offset = [] from pyopencl.tools import VectorArg for arg in self.arg_decls: if isinstance(arg, VectorArg) and arg.with_offset: arg = VectorArg(arg.dtype, arg.name) arg_decls_no_offset.append(arg) self.arg_decls_no_offset = arg_decls_no_offset self.count_sharing = count_sharing self.name_prefix = name_prefix self.preamble = preamble self.options = options self.debug = debug self.complex_kernel = complex_kernel if eliminate_empty_output_lists is True: eliminate_empty_output_lists = \ [name for name, _ in self.list_names_and_dtypes] if eliminate_empty_output_lists is False: eliminate_empty_output_lists = [] self.eliminate_empty_output_lists = eliminate_empty_output_lists for list_name in self.eliminate_empty_output_lists: if not any(list_name == name for name, _ in self.list_names_and_dtypes): raise ValueError( "invalid list name '%s' in eliminate_empty_output_lists" % list_name)