def __init__(self,
                 context,
                 arguments,
                 key_expr,
                 sort_arg_names,
                 bits_at_a_time=2,
                 index_dtype=np.int32,
                 key_dtype=np.uint32,
                 options=[]):
        """
        :arg arguments: A string of comma-separated C argument declarations.
            If *arguments* is specified, then *input_expr* must also be
            specified. All types used here must be known to PyOpenCL.
            (see :func:`pyopencl.tools.get_or_register_dtype`).
        :arg key_expr: An integer-valued C expression returning the
            key based on which the sort is performed. The array index
            for which the key is to be computed is available as `i`.
            The expression may refer to any of the *arguments*.
        :arg sort_arg_names: A list of argument names whose corresponding
            array arguments will be sorted according to *key_expr*.
        """

        # {{{ arg processing

        from pyopencl.tools import parse_arg_list
        self.arguments = parse_arg_list(arguments)
        del arguments

        self.sort_arg_names = sort_arg_names
        self.bits = int(bits_at_a_time)
        self.index_dtype = np.dtype(index_dtype)
        self.key_dtype = np.dtype(key_dtype)

        self.options = options

        # }}}

        # {{{ kernel creation

        scan_ctype, scan_dtype, scan_t_cdecl = \
                _make_sort_scan_type(context.devices[0], self.bits, self.index_dtype)

        from pyopencl.tools import VectorArg, ScalarArg
        scan_arguments = (list(self.arguments) + [
            VectorArg(arg.dtype, "sorted_" + arg.name)
            for arg in self.arguments if arg.name in sort_arg_names
        ] + [ScalarArg(np.int32, "base_bit")])

        def get_count_branch(known_bits):
            if len(known_bits) == self.bits:
                return "s.c%s" % known_bits

            boundary_mnr = known_bits + "1" + (self.bits - len(known_bits) -
                                               1) * "0"

            return ("((mnr < %s) ? %s : %s)" %
                    (int(boundary_mnr, 2), get_count_branch(known_bits + "0"),
                     get_count_branch(known_bits + "1")))

        codegen_args = dict(
            bits=self.bits,
            key_ctype=dtype_to_ctype(self.key_dtype),
            key_expr=key_expr,
            index_ctype=dtype_to_ctype(self.index_dtype),
            index_type_max=np.iinfo(self.index_dtype).max,
            padded_bin=_padded_bin,
            scan_ctype=scan_ctype,
            sort_arg_names=sort_arg_names,
            get_count_branch=get_count_branch,
        )

        preamble = scan_t_cdecl + RADIX_SORT_PREAMBLE_TPL.render(
            **codegen_args)
        scan_preamble = preamble \
                + RADIX_SORT_SCAN_PREAMBLE_TPL.render(**codegen_args)

        from pyopencl.scan import GenericScanKernel
        self.scan_kernel = GenericScanKernel(
            context,
            scan_dtype,
            arguments=scan_arguments,
            input_expr="scan_t_from_value(%s, base_bit, i)" % key_expr,
            scan_expr="scan_t_add(a, b, across_seg_boundary)",
            neutral="scan_t_neutral()",
            output_statement=RADIX_SORT_OUTPUT_STMT_TPL.render(**codegen_args),
            preamble=scan_preamble,
            options=self.options)

        for i, arg in enumerate(self.arguments):
            if isinstance(arg, VectorArg):
                self.first_array_arg_idx = i
示例#2
0
def get_elwise_kernel_and_types(context,
                                arguments,
                                operation,
                                name="elwise_kernel",
                                options=[],
                                preamble="",
                                use_range=False,
                                **kwargs):

    from pyopencl.tools import parse_arg_list, get_arg_offset_adjuster_code
    parsed_args = parse_arg_list(arguments, with_offset=True)

    auto_preamble = kwargs.pop("auto_preamble", True)

    pragmas = []
    includes = []
    have_double_pragma = False
    have_complex_include = False

    if auto_preamble:
        for arg in parsed_args:
            if arg.dtype in [np.float64, np.complex128]:
                if not have_double_pragma:
                    pragmas.append("""
                        #if __OPENCL_C_VERSION__ < 120
                        #pragma OPENCL EXTENSION cl_khr_fp64: enable
                        #endif
                        #define PYOPENCL_DEFINE_CDOUBLE
                        """)
                    have_double_pragma = True
            if arg.dtype.kind == 'c':
                if not have_complex_include:
                    includes.append("#include <pyopencl-complex.h>\n")
                    have_complex_include = True

    if pragmas or includes:
        preamble = "\n".join(pragmas + includes) + "\n" + preamble

    if use_range:
        parsed_args.extend([
            ScalarArg(np.intp, "start"),
            ScalarArg(np.intp, "stop"),
            ScalarArg(np.intp, "step"),
        ])
    else:
        parsed_args.append(ScalarArg(np.intp, "n"))

    loop_prep = kwargs.pop("loop_prep", "")
    loop_prep = get_arg_offset_adjuster_code(parsed_args) + loop_prep
    prg = get_elwise_program(context,
                             parsed_args,
                             operation,
                             name=name,
                             options=options,
                             preamble=preamble,
                             use_range=use_range,
                             loop_prep=loop_prep,
                             **kwargs)

    from pyopencl.tools import get_arg_list_scalar_arg_dtypes

    kernel = getattr(prg, name)
    kernel.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(parsed_args))

    return kernel, parsed_args
示例#3
0
def get_elwise_kernel_and_types(context,
                                arguments,
                                operation,
                                name="elwise_kernel",
                                options=[],
                                preamble="",
                                use_range=False,
                                **kwargs):
    if isinstance(arguments, str):
        from pyopencl.tools import parse_c_arg
        parsed_args = [parse_c_arg(arg) for arg in arguments.split(",")]
    else:
        parsed_args = arguments

    auto_preamble = kwargs.pop("auto_preamble", True)

    pragmas = []
    includes = []
    have_double_pragma = False
    have_complex_include = False

    if auto_preamble:
        for arg in parsed_args:
            if arg.dtype in [np.float64, np.complex128]:
                if not have_double_pragma:
                    pragmas.append(
                        "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
                        "#define PYOPENCL_DEFINE_CDOUBLE\n")
                    have_double_pragma = True
            if arg.dtype.kind == 'c':
                if not have_complex_include:
                    includes.append("#include <pyopencl-complex.h>\n")
                    have_complex_include = True

    if pragmas or includes:
        preamble = "\n".join(pragmas + includes) + "\n" + preamble

    if use_range:
        parsed_args.extend([
            ScalarArg(np.intp, "start"),
            ScalarArg(np.intp, "stop"),
            ScalarArg(np.intp, "step"),
        ])
    else:
        parsed_args.append(ScalarArg(np.intp, "n"))

    prg = get_elwise_program(context,
                             parsed_args,
                             operation,
                             name=name,
                             options=options,
                             preamble=preamble,
                             use_range=use_range,
                             **kwargs)

    scalar_arg_dtypes = []
    for arg in parsed_args:
        if isinstance(arg, ScalarArg):
            scalar_arg_dtypes.append(arg.dtype)
        else:
            scalar_arg_dtypes.append(None)

    kernel = getattr(prg, name)
    kernel.set_scalar_arg_dtypes(scalar_arg_dtypes)

    return kernel, parsed_args
示例#4
0
文件: beam.py 项目: bellaz89/pyFEL
    def initialize(cls):
        '''
            Compile kernels
        '''
        cls.program = cl.Program(cl_ctx, F(cls.KERNEL)).build()
        cls.longitudinal_sort_kernel = RadixSort(cl_ctx,
                                                 [VectorArg(cl_ftype, "x"), 
                                                  VectorArg(cl_ftype, "px"),
                                                  VectorArg(cl_ftype, "y"),
                                                  VectorArg(cl_ftype, "py"),
                                                  VectorArg(cl_ftype, "theta"),
                                                  VectorArg(cl_ftype, "gamma"),
                                                  ScalarArg(cl_ftype, "inv_slice_len")],
                                                 key_expr="(int) floor(theta[i]*inv_slice_len)",
                                                 sort_arg_names=["x", "px", "y", "py", "theta", "gamma"],
                                                 key_dtype=np.int32)

        class LongitudinalTraverseScanKernel(GenericScanKernel):
            '''
                Adds a preamble method for the longitudinal traverse sort
            '''
            def __init__(self, *argl, **argd):
                '''
                    Patch argd['preamble']
                '''

                sort_fun = '''
                            int sort_fun(FLOAT_TYPE x, 
                                         FLOAT_TYPE y, 
                                         FLOAT_TYPE theta, 
                                         FLOAT_TYPE inv_slice_len, 
                                         FLOAT_TYPE inv_traverse_len,
                                         int bins) {
                                         
                                         FLOAT_TYPE xnorm = 0.5 + (inv_traverse_len*x);
                                         FLOAT_TYPE ynorm = 0.5 + (inv_traverse_len*y);
                                         int xbin = (int) floor(xnorm * inv_traverse_len);
                                         int ybin = (int) floor(ynorm * inv_traverse_len);
                                         int zbin = (int) floor(theta*inv_slice_len);

                                         if ((xbin < 0) || (xbin >= bins) || (ybin < 0) || (ybin >= bins)) {
                                            xbin = 0;
                                            ybin = 0;

                                         }

                                         return xbin+bins*(ybin+bins*zbin);
                            }
                           '''
                
                new_argd = dict(argd)
                new_argd['preamble'] = F(sort_fun + new_argd['preamble'])
                super().__init__(*argl, **new_argd)
        
        cls.longitudinal_traverse_sort_kernel = RadixSort(cl_ctx,
                                                          [VectorArg(cl_ftype, "x"), 
                                                           VectorArg(cl_ftype, "px"),
                                                           VectorArg(cl_ftype, "y"),
                                                           VectorArg(cl_ftype, "py"),
                                                           VectorArg(cl_ftype, "theta"),
                                                           VectorArg(cl_ftype, "gamma"),
                                                           ScalarArg(cl_ftype, "inv_slice_len"),
                                                           ScalarArg(cl_ftype, "inv_traverse_len"),
                                                           ScalarArg(np.int32, "bins")],
                                                           key_expr="sort_fun(x[i],y[i],theta[i], inv_slice_len, inv_traverse_len, bins)",
                                                           sort_arg_names=["x", "px", "y", "py", "theta", "gamma"],
                                                           scan_kernel = LongitudinalTraverseScanKernel,
                                                           key_dtype=np.int32)
示例#5
0
    def get_kernel_info(self, dimensions, particle_id_dtype, box_id_dtype,
            coord_dtype, box_level_dtype, max_levels,
            sources_are_targets, sources_have_extent, targets_have_extent,
            stick_out_factor):

        logging.info("building traversal build kernels")

        debug = False

        from pyopencl.tools import dtype_to_ctype
        from boxtree.tree import box_flags_enum
        render_vars = dict(
                dimensions=dimensions,
                dtype_to_ctype=dtype_to_ctype,
                particle_id_dtype=particle_id_dtype,
                box_id_dtype=box_id_dtype,
                box_flags_enum=box_flags_enum,
                coord_dtype=coord_dtype,
                vec_types=cl.array.vec.types,
                max_levels=max_levels,
                AXIS_NAMES=AXIS_NAMES,
                debug=debug,
                sources_are_targets=sources_are_targets,
                sources_have_extent=sources_have_extent,
                targets_have_extent=targets_have_extent,
                stick_out_factor=stick_out_factor,
                )
        from pyopencl.algorithm import ListOfListsBuilder
        from pyopencl.tools import VectorArg, ScalarArg

        result = {}

        # {{{ source boxes, their parents, target boxes

        src = Template(
                TRAVERSAL_PREAMBLE_TEMPLATE
                + SOURCES_PARENTS_AND_TARGETS_TEMPLATE,
                strict_undefined=True).render(**render_vars)

        result["sources_parents_and_targets_builder"] = \
                ListOfListsBuilder(self.context,
                        [
                            ("source_parent_boxes", box_id_dtype),
                            ("source_boxes", box_id_dtype),
                            ("target_or_target_parent_boxes", box_id_dtype)
                            ] + (
                                [("target_boxes", box_id_dtype)]
                                if not sources_are_targets
                                else []),
                        str(src),
                        arg_decls=[
                            VectorArg(box_flags_enum.dtype, "box_flags"),
                            ],
                        debug=debug,
                        name_prefix="sources_parents_and_targets")

        result["level_start_box_nrs_extractor"] = \
                LEVEL_START_BOX_NR_EXTRACTOR_TEMPLATE.build(self.context,
                    type_aliases=(
                        ("box_id_t", box_id_dtype),
                        ("box_level_t", box_level_dtype),
                        ),
                    )

        # }}}

        # {{{ build list N builders

        base_args = [
                VectorArg(coord_dtype, "box_centers"),
                ScalarArg(coord_dtype, "root_extent"),
                VectorArg(np.uint8, "box_levels"),
                ScalarArg(box_id_dtype, "aligned_nboxes"),
                VectorArg(box_id_dtype, "box_child_ids"),
                VectorArg(box_flags_enum.dtype, "box_flags"),
                ]

        for list_name, template, extra_args, extra_lists in [
                ("colleagues", COLLEAGUES_TEMPLATE, [], []),
                ("neighbor_source_boxes", NEIGBHOR_SOURCE_BOXES_TEMPLATE,
                        [
                            VectorArg(box_id_dtype, "target_boxes"),
                            ], []),
                ("sep_siblings", SEP_SIBLINGS_TEMPLATE,
                        [
                            VectorArg(box_id_dtype, "target_or_target_parent_boxes"),
                            VectorArg(box_id_dtype, "box_parent_ids"),
                            VectorArg(box_id_dtype, "colleagues_starts"),
                            VectorArg(box_id_dtype, "colleagues_list"),
                            ], []),
                ("sep_smaller", SEP_SMALLER_TEMPLATE,
                        [
                            VectorArg(box_id_dtype, "target_boxes"),
                            VectorArg(box_id_dtype, "colleagues_starts"),
                            VectorArg(box_id_dtype, "colleagues_list"),
                            ],
                            ["sep_close_smaller"]
                            if sources_have_extent or targets_have_extent
                            else []),
                ("sep_bigger", SEP_BIGGER_TEMPLATE,
                        [
                            VectorArg(box_id_dtype, "target_or_target_parent_boxes"),
                            VectorArg(box_id_dtype, "box_parent_ids"),
                            VectorArg(box_id_dtype, "colleagues_starts"),
                            VectorArg(box_id_dtype, "colleagues_list"),
                            ],
                            ["sep_close_bigger"]
                            if sources_have_extent or targets_have_extent
                            else []),
                ]:
            src = Template(
                    TRAVERSAL_PREAMBLE_TEMPLATE
                    + HELPER_FUNCTION_TEMPLATE
                    + template,
                    strict_undefined=True).render(**render_vars)

            result[list_name+"_builder"] = ListOfListsBuilder(self.context,
                    [(list_name, box_id_dtype)]
                    + [(extra_list_name, box_id_dtype)
                        for extra_list_name in extra_lists],
                    str(src),
                    arg_decls=base_args + extra_args,
                    debug=debug, name_prefix=list_name,
                    complex_kernel=True)

        # }}}

        logging.info("traversal build kernels built")

        return _KernelInfo(**result)