示例#1
0
def get_linear_combination_kernel(summand_descriptors, dtype_z):
    # TODO: Port this!
    raise NotImplementedError

    from pyopencl.tools import dtype_to_ctype
    from pyopencl.elementwise import \
            VectorArg, ScalarArg, get_elwise_module

    args = []
    preamble = []
    loop_prep = []
    summands = []
    tex_names = []

    for i, (is_gpu_scalar, scalar_dtype, vector_dtype) in \
            enumerate(summand_descriptors):
        if is_gpu_scalar:
            preamble.append(
                "texture <%s, 1, cudaReadModeElementType> tex_a%d;" %
                (dtype_to_ctype(scalar_dtype, with_fp_tex_hack=True), i))
            args.append(VectorArg(vector_dtype, "x%d" % i))
            tex_names.append("tex_a%d" % i)
            loop_prep.append("%s a%d = fp_tex1Dfetch(tex_a%d, 0)" %
                             (dtype_to_ctype(scalar_dtype), i, i))
        else:
            args.append(ScalarArg(scalar_dtype, "a%d" % i))
            args.append(VectorArg(vector_dtype, "x%d" % i))

        summands.append("a%d*x%d[i]" % (i, i))

    args.append(VectorArg(dtype_z, "z"))
    args.append(ScalarArg(np.uintp, "n"))

    mod = get_elwise_module(args,
                            "z[i] = " + " + ".join(summands),
                            "linear_combination",
                            preamble="\n".join(preamble),
                            loop_prep=";\n".join(loop_prep))

    func = mod.get_function("linear_combination")
    tex_src = [mod.get_texref(tn) for tn in tex_names]
    func.prepare("".join(arg.struct_char for arg in args), (1, 1, 1),
                 texrefs=tex_src)

    return func, tex_src
示例#2
0
def get_array_scalar_binop_kernel(context, operator, dtype_res, dtype_a,
                                  dtype_b):
    return get_elwise_kernel(context, [
        VectorArg(dtype_res, "out", with_offset=True),
        VectorArg(dtype_a, "a", with_offset=True),
        ScalarArg(dtype_b, "b"),
    ],
                             "out[i] = a[i] %s b" % operator,
                             name="scalar_binop_kernel")
示例#3
0
    def get_kernels(self, key_dtype, value_dtype, starts_dtype):
        from pyopencl.algorithm import RadixSort
        from pyopencl.tools import VectorArg, ScalarArg

        by_target_sorter = RadixSort(
                self.context, [
                    VectorArg(value_dtype, "values"),
                    VectorArg(key_dtype, "keys"),
                    ],
                key_expr="keys[i]",
                sort_arg_names=["values", "keys"])

        from pyopencl.elementwise import ElementwiseTemplate
        start_finder = ElementwiseTemplate(
                arguments="""//CL//
                starts_t *key_group_starts,
                key_t *keys_sorted_by_key,
                """,

                operation=r"""//CL//
                key_t my_key = keys_sorted_by_key[i];

                if (i == 0 || my_key != keys_sorted_by_key[i-1])
                    key_group_starts[my_key] = i;
                """,
                name="find_starts").build(self.context,
                        type_aliases=(
                            ("key_t", starts_dtype),
                            ("starts_t", starts_dtype),
                            ),
                        var_values=())

        from pyopencl.scan import GenericScanKernel
        bound_propagation_scan = GenericScanKernel(
                self.context, starts_dtype,
                arguments=[
                    VectorArg(starts_dtype, "starts"),
                    # starts has length n+1
                    ScalarArg(key_dtype, "nkeys"),
                    ],
                input_expr="starts[nkeys-i]",
                scan_expr="min(a, b)",
                neutral=_make_cl_int_literal(
                    np.iinfo(starts_dtype).max, starts_dtype),
                output_statement="starts[nkeys-i] = item;")

        return _KernelInfo(
                by_target_sorter=by_target_sorter,
                start_finder=start_finder,
                bound_propagation_scan=bound_propagation_scan)
示例#4
0
def extract_extra_args_types_values(extra_args):
    from pyopencl.tools import VectorArg, ScalarArg

    extra_args_types = []
    extra_args_values = []
    for name, val in extra_args:
        if isinstance(val, cl.array.Array):
            extra_args_types.append(VectorArg(val.dtype, name, with_offset=False))
            extra_args_values.append(val)
        elif isinstance(val, np.generic):
            extra_args_types.append(ScalarArg(val.dtype, name))
            extra_args_values.append(val)
        else:
            raise RuntimeError("argument '%d' not understood" % name)

    return tuple(extra_args_types), extra_args_values
示例#5
0
def get_bessel_kernel(context,
                      which_func,
                      out_dtype=np.float64,
                      order_dtype=np.int32,
                      x_dtype=np.float64):
    return get_elwise_kernel(context, [
        VectorArg(out_dtype, "z", with_offset=True),
        ScalarArg(order_dtype, "ord_n"),
        VectorArg(x_dtype, "x", with_offset=True),
    ],
                             "z[i] = bessel_%sn(ord_n, x[i])" % which_func,
                             name="bessel_%sn_kernel" % which_func,
                             preamble="""
        #pragma OPENCL EXTENSION cl_khr_fp64: enable
        #define PYOPENCL_DEFINE_CDOUBLE
        #include <pyopencl-bessel-%s.cl>
        """ % which_func)
示例#6
0
def get_elwise_kernel_and_types(context, arguments, operation,
        name="elwise_kernel", options=[], preamble="", **kwargs):
    if isinstance(arguments, str):
        from pyopencl.tools import parse_c_arg
        parsed_args = [parse_c_arg(arg) for arg in arguments.split(",")]
    else:
        parsed_args = arguments

    pragmas = []
    includes = []
    have_double_pragma = False
    have_complex_include = False

    for arg in parsed_args:
        if arg.dtype in [np.float64, np.complex128]:
            if not have_double_pragma:
                pragmas.append(
                        "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
                        "#define PYOPENCL_DEFINE_CDOUBLE\n")
                have_double_pragma = True
        if arg.dtype.kind == 'c':
            if not have_complex_include:
                includes.append("#include <pyopencl-complex.h>\n")
                have_complex_include = True

    if pragmas or includes:
        preamble = "\n".join(pragmas+includes) + "\n" + preamble

    parsed_args.append(ScalarArg(np.uintp, "n"))

    prg = get_elwise_program(
        context, parsed_args, operation,
        name=name, options=options, preamble=preamble, **kwargs)

    scalar_arg_dtypes = []
    for arg in parsed_args:
        if isinstance(arg, ScalarArg):
            scalar_arg_dtypes.append(arg.dtype)
        else:
            scalar_arg_dtypes.append(None)

    kernel = getattr(prg, name)
    kernel.set_scalar_arg_dtypes(scalar_arg_dtypes)

    return kernel, parsed_args
示例#7
0
def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
    ctx = {
            "idx_tp": dtype_to_ctype(idx_dtype),
            "tp": dtype_to_ctype(dtype),
            }

    args = [
            VectorArg(dtype, "dest%d" % i)
            for i in range(vec_count)
            ] + [
                VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
                VectorArg(idx_dtype, "gmem_src_idx", with_offset=True),
            ] + [
                VectorArg(dtype, "src%d" % i, with_offset=True)
                for i in range(vec_count)
            ] + [
                ScalarArg(idx_dtype, "offset%d" % i)
                for i in range(vec_count) if with_offsets
            ]

    if with_offsets:
        def get_copy_insn(i):
            return ("dest%d[dest_idx] = "
                    "src%d[src_idx+offset%d];"
                    % (i, i, i))
    else:
        def get_copy_insn(i):
            return ("dest%d[dest_idx] = "
                    "src%d[src_idx];" % (i, i))

    body = (("%(idx_tp)s src_idx = gmem_src_idx[i];\n"
                "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx)
            + "\n".join(get_copy_insn(i) for i in range(vec_count)))

    return get_elwise_kernel(context, args, body,
            preamble=dtype_to_c_struct(context.devices[0], dtype),
            name="take_put")
示例#8
0
    def render_argument_list(self, *arg_lists):
        all_args = []

        for arg_list in arg_lists:
            if isinstance(arg_list, str):
                arg_list = str(
                    self.template.get_text_template(arg_list).render(
                        self.var_dict))
                arg_list = self._C_COMMENT_FINDER.sub("", arg_list)
                arg_list = arg_list.replace("\n", " ")

                all_args.extend(arg_list.split(","))
            else:
                all_args.extend(arg_list)

        from pyopencl.compyte.dtypes import parse_c_arg_backend
        parsed_args = []
        for arg in all_args:
            if isinstance(arg, str):
                arg = arg.strip()
                if not arg:
                    continue

                ph = parse_c_arg_backend(arg,
                                         _ScalarArgPlaceholder,
                                         _VectorArgPlaceholder,
                                         name_to_dtype=lambda x: x)
                parsed_arg = self.render_arg(ph)

            elif isinstance(arg, Argument):
                parsed_arg = arg
            elif isinstance(arg, tuple):
                parsed_arg = ScalarArg(self.parse_type(arg[0]), arg[1])

            parsed_args.append(parsed_arg)

        return parsed_args
    def __init__(self,
                 context,
                 arguments,
                 key_expr,
                 sort_arg_names,
                 bits_at_a_time=2,
                 index_dtype=np.int32,
                 key_dtype=np.uint32,
                 options=[]):
        """
        :arg arguments: A string of comma-separated C argument declarations.
            If *arguments* is specified, then *input_expr* must also be
            specified. All types used here must be known to PyOpenCL.
            (see :func:`pyopencl.tools.get_or_register_dtype`).
        :arg key_expr: An integer-valued C expression returning the
            key based on which the sort is performed. The array index
            for which the key is to be computed is available as `i`.
            The expression may refer to any of the *arguments*.
        :arg sort_arg_names: A list of argument names whose corresponding
            array arguments will be sorted according to *key_expr*.
        """

        # {{{ arg processing

        from pyopencl.tools import parse_arg_list
        self.arguments = parse_arg_list(arguments)
        del arguments

        self.sort_arg_names = sort_arg_names
        self.bits = int(bits_at_a_time)
        self.index_dtype = np.dtype(index_dtype)
        self.key_dtype = np.dtype(key_dtype)

        self.options = options

        # }}}

        # {{{ kernel creation

        scan_ctype, scan_dtype, scan_t_cdecl = \
                _make_sort_scan_type(context.devices[0], self.bits, self.index_dtype)

        from pyopencl.tools import VectorArg, ScalarArg
        scan_arguments = (list(self.arguments) + [
            VectorArg(arg.dtype, "sorted_" + arg.name)
            for arg in self.arguments if arg.name in sort_arg_names
        ] + [ScalarArg(np.int32, "base_bit")])

        def get_count_branch(known_bits):
            if len(known_bits) == self.bits:
                return "s.c%s" % known_bits

            boundary_mnr = known_bits + "1" + (self.bits - len(known_bits) -
                                               1) * "0"

            return ("((mnr < %s) ? %s : %s)" %
                    (int(boundary_mnr, 2), get_count_branch(known_bits + "0"),
                     get_count_branch(known_bits + "1")))

        codegen_args = dict(
            bits=self.bits,
            key_ctype=dtype_to_ctype(self.key_dtype),
            key_expr=key_expr,
            index_ctype=dtype_to_ctype(self.index_dtype),
            index_type_max=np.iinfo(self.index_dtype).max,
            padded_bin=_padded_bin,
            scan_ctype=scan_ctype,
            sort_arg_names=sort_arg_names,
            get_count_branch=get_count_branch,
        )

        preamble = scan_t_cdecl + RADIX_SORT_PREAMBLE_TPL.render(
            **codegen_args)
        scan_preamble = preamble \
                + RADIX_SORT_SCAN_PREAMBLE_TPL.render(**codegen_args)

        from pyopencl.scan import GenericScanKernel
        self.scan_kernel = GenericScanKernel(
            context,
            scan_dtype,
            arguments=scan_arguments,
            input_expr="scan_t_from_value(%s, base_bit, i)" % key_expr,
            scan_expr="scan_t_add(a, b, across_seg_boundary)",
            neutral="scan_t_neutral()",
            output_statement=RADIX_SORT_OUTPUT_STMT_TPL.render(**codegen_args),
            preamble=scan_preamble,
            options=self.options)

        for i, arg in enumerate(self.arguments):
            if isinstance(arg, VectorArg):
                self.first_array_arg_idx = i
示例#10
0
def get_elwise_kernel_and_types(context,
                                arguments,
                                operation,
                                name="elwise_kernel",
                                options=[],
                                preamble="",
                                use_range=False,
                                **kwargs):

    from pyopencl.tools import parse_arg_list, get_arg_offset_adjuster_code
    parsed_args = parse_arg_list(arguments, with_offset=True)

    auto_preamble = kwargs.pop("auto_preamble", True)

    pragmas = []
    includes = []
    have_double_pragma = False
    have_complex_include = False

    if auto_preamble:
        for arg in parsed_args:
            if arg.dtype in [np.float64, np.complex128]:
                if not have_double_pragma:
                    pragmas.append("""
                        #if __OPENCL_C_VERSION__ < 120
                        #pragma OPENCL EXTENSION cl_khr_fp64: enable
                        #endif
                        #define PYOPENCL_DEFINE_CDOUBLE
                        """)
                    have_double_pragma = True
            if arg.dtype.kind == 'c':
                if not have_complex_include:
                    includes.append("#include <pyopencl-complex.h>\n")
                    have_complex_include = True

    if pragmas or includes:
        preamble = "\n".join(pragmas + includes) + "\n" + preamble

    if use_range:
        parsed_args.extend([
            ScalarArg(np.intp, "start"),
            ScalarArg(np.intp, "stop"),
            ScalarArg(np.intp, "step"),
        ])
    else:
        parsed_args.append(ScalarArg(np.intp, "n"))

    loop_prep = kwargs.pop("loop_prep", "")
    loop_prep = get_arg_offset_adjuster_code(parsed_args) + loop_prep
    prg = get_elwise_program(context,
                             parsed_args,
                             operation,
                             name=name,
                             options=options,
                             preamble=preamble,
                             use_range=use_range,
                             loop_prep=loop_prep,
                             **kwargs)

    from pyopencl.tools import get_arg_list_scalar_arg_dtypes

    kernel = getattr(prg, name)
    kernel.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(parsed_args))

    return kernel, parsed_args
示例#11
0
文件: beam.py 项目: bellaz89/pyFEL
    def initialize(cls):
        '''
            Compile kernels
        '''
        cls.program = cl.Program(cl_ctx, F(cls.KERNEL)).build()
        cls.longitudinal_sort_kernel = RadixSort(cl_ctx,
                                                 [VectorArg(cl_ftype, "x"), 
                                                  VectorArg(cl_ftype, "px"),
                                                  VectorArg(cl_ftype, "y"),
                                                  VectorArg(cl_ftype, "py"),
                                                  VectorArg(cl_ftype, "theta"),
                                                  VectorArg(cl_ftype, "gamma"),
                                                  ScalarArg(cl_ftype, "inv_slice_len")],
                                                 key_expr="(int) floor(theta[i]*inv_slice_len)",
                                                 sort_arg_names=["x", "px", "y", "py", "theta", "gamma"],
                                                 key_dtype=np.int32)

        class LongitudinalTraverseScanKernel(GenericScanKernel):
            '''
                Adds a preamble method for the longitudinal traverse sort
            '''
            def __init__(self, *argl, **argd):
                '''
                    Patch argd['preamble']
                '''

                sort_fun = '''
                            int sort_fun(FLOAT_TYPE x, 
                                         FLOAT_TYPE y, 
                                         FLOAT_TYPE theta, 
                                         FLOAT_TYPE inv_slice_len, 
                                         FLOAT_TYPE inv_traverse_len,
                                         int bins) {
                                         
                                         FLOAT_TYPE xnorm = 0.5 + (inv_traverse_len*x);
                                         FLOAT_TYPE ynorm = 0.5 + (inv_traverse_len*y);
                                         int xbin = (int) floor(xnorm * inv_traverse_len);
                                         int ybin = (int) floor(ynorm * inv_traverse_len);
                                         int zbin = (int) floor(theta*inv_slice_len);

                                         if ((xbin < 0) || (xbin >= bins) || (ybin < 0) || (ybin >= bins)) {
                                            xbin = 0;
                                            ybin = 0;

                                         }

                                         return xbin+bins*(ybin+bins*zbin);
                            }
                           '''
                
                new_argd = dict(argd)
                new_argd['preamble'] = F(sort_fun + new_argd['preamble'])
                super().__init__(*argl, **new_argd)
        
        cls.longitudinal_traverse_sort_kernel = RadixSort(cl_ctx,
                                                          [VectorArg(cl_ftype, "x"), 
                                                           VectorArg(cl_ftype, "px"),
                                                           VectorArg(cl_ftype, "y"),
                                                           VectorArg(cl_ftype, "py"),
                                                           VectorArg(cl_ftype, "theta"),
                                                           VectorArg(cl_ftype, "gamma"),
                                                           ScalarArg(cl_ftype, "inv_slice_len"),
                                                           ScalarArg(cl_ftype, "inv_traverse_len"),
                                                           ScalarArg(np.int32, "bins")],
                                                           key_expr="sort_fun(x[i],y[i],theta[i], inv_slice_len, inv_traverse_len, bins)",
                                                           sort_arg_names=["x", "px", "y", "py", "theta", "gamma"],
                                                           scan_kernel = LongitudinalTraverseScanKernel,
                                                           key_dtype=np.int32)
示例#12
0
    def get_kernel_info(self, dimensions, particle_id_dtype, box_id_dtype,
            coord_dtype, box_level_dtype, max_levels,
            sources_are_targets, sources_have_extent, targets_have_extent,
            stick_out_factor):

        logging.info("building traversal build kernels")

        debug = False

        from pyopencl.tools import dtype_to_ctype
        from boxtree.tree import box_flags_enum
        render_vars = dict(
                dimensions=dimensions,
                dtype_to_ctype=dtype_to_ctype,
                particle_id_dtype=particle_id_dtype,
                box_id_dtype=box_id_dtype,
                box_flags_enum=box_flags_enum,
                coord_dtype=coord_dtype,
                vec_types=cl.array.vec.types,
                max_levels=max_levels,
                AXIS_NAMES=AXIS_NAMES,
                debug=debug,
                sources_are_targets=sources_are_targets,
                sources_have_extent=sources_have_extent,
                targets_have_extent=targets_have_extent,
                stick_out_factor=stick_out_factor,
                )
        from pyopencl.algorithm import ListOfListsBuilder
        from pyopencl.tools import VectorArg, ScalarArg

        result = {}

        # {{{ source boxes, their parents, target boxes

        src = Template(
                TRAVERSAL_PREAMBLE_TEMPLATE
                + SOURCES_PARENTS_AND_TARGETS_TEMPLATE,
                strict_undefined=True).render(**render_vars)

        result["sources_parents_and_targets_builder"] = \
                ListOfListsBuilder(self.context,
                        [
                            ("source_parent_boxes", box_id_dtype),
                            ("source_boxes", box_id_dtype),
                            ("target_or_target_parent_boxes", box_id_dtype)
                            ] + (
                                [("target_boxes", box_id_dtype)]
                                if not sources_are_targets
                                else []),
                        str(src),
                        arg_decls=[
                            VectorArg(box_flags_enum.dtype, "box_flags"),
                            ],
                        debug=debug,
                        name_prefix="sources_parents_and_targets")

        result["level_start_box_nrs_extractor"] = \
                LEVEL_START_BOX_NR_EXTRACTOR_TEMPLATE.build(self.context,
                    type_aliases=(
                        ("box_id_t", box_id_dtype),
                        ("box_level_t", box_level_dtype),
                        ),
                    )

        # }}}

        # {{{ build list N builders

        base_args = [
                VectorArg(coord_dtype, "box_centers"),
                ScalarArg(coord_dtype, "root_extent"),
                VectorArg(np.uint8, "box_levels"),
                ScalarArg(box_id_dtype, "aligned_nboxes"),
                VectorArg(box_id_dtype, "box_child_ids"),
                VectorArg(box_flags_enum.dtype, "box_flags"),
                ]

        for list_name, template, extra_args, extra_lists in [
                ("colleagues", COLLEAGUES_TEMPLATE, [], []),
                ("neighbor_source_boxes", NEIGBHOR_SOURCE_BOXES_TEMPLATE,
                        [
                            VectorArg(box_id_dtype, "target_boxes"),
                            ], []),
                ("sep_siblings", SEP_SIBLINGS_TEMPLATE,
                        [
                            VectorArg(box_id_dtype, "target_or_target_parent_boxes"),
                            VectorArg(box_id_dtype, "box_parent_ids"),
                            VectorArg(box_id_dtype, "colleagues_starts"),
                            VectorArg(box_id_dtype, "colleagues_list"),
                            ], []),
                ("sep_smaller", SEP_SMALLER_TEMPLATE,
                        [
                            VectorArg(box_id_dtype, "target_boxes"),
                            VectorArg(box_id_dtype, "colleagues_starts"),
                            VectorArg(box_id_dtype, "colleagues_list"),
                            ],
                            ["sep_close_smaller"]
                            if sources_have_extent or targets_have_extent
                            else []),
                ("sep_bigger", SEP_BIGGER_TEMPLATE,
                        [
                            VectorArg(box_id_dtype, "target_or_target_parent_boxes"),
                            VectorArg(box_id_dtype, "box_parent_ids"),
                            VectorArg(box_id_dtype, "colleagues_starts"),
                            VectorArg(box_id_dtype, "colleagues_list"),
                            ],
                            ["sep_close_bigger"]
                            if sources_have_extent or targets_have_extent
                            else []),
                ]:
            src = Template(
                    TRAVERSAL_PREAMBLE_TEMPLATE
                    + HELPER_FUNCTION_TEMPLATE
                    + template,
                    strict_undefined=True).render(**render_vars)

            result[list_name+"_builder"] = ListOfListsBuilder(self.context,
                    [(list_name, box_id_dtype)]
                    + [(extra_list_name, box_id_dtype)
                        for extra_list_name in extra_lists],
                    str(src),
                    arg_decls=base_args + extra_args,
                    debug=debug, name_prefix=list_name,
                    complex_kernel=True)

        # }}}

        logging.info("traversal build kernels built")

        return _KernelInfo(**result)