Exemplo n.º 1
0
def generate_definition(kernel_info, target='cpu'):
    """Generates the definition (i.e. implementation) of the kernel function"""
    ast = kernel_info.ast
    result = generate_c(ast, dialect='cuda' if target == 'gpu' else 'c')
    result = "namespace internal_%s {\nstatic %s\n}" % (ast.function_name,
                                                        result)
    return result
Exemplo n.º 2
0
    def write_to_file(self, restrict_qualifier, function_prefix, file):
        headers = {'<math.h>', '<stdint.h>'}
        for ast in self._ast_nodes:
            headers.update(get_headers(ast))
        header_list = list(headers)
        header_list.sort()
        header_list.insert(0, '"Python.h"')

        includes = "\n".join(
            ["#include %s" % (include_file, ) for include_file in header_list])
        print(includes, file=file)
        print("\n", file=file)
        print("#define RESTRICT %s" % (restrict_qualifier, ), file=file)
        print("#define FUNC_PREFIX %s" % (function_prefix, ), file=file)
        print("\n", file=file)

        for ast, name in zip(self._ast_nodes, self._function_names):
            old_name = ast.function_name
            ast.function_name = "kernel_" + name
            print(generate_c(ast, custom_backend=self._custom_backend),
                  file=file)
            print(create_function_boilerplate_code(ast.get_parameters(), name),
                  file=file)
            ast.function_name = old_name
        print(create_module_boilerplate_code(self.module_name,
                                             self._function_names),
              file=file)
Exemplo n.º 3
0
def expr_to_dict(expr_or_node: NodeOrExpr,
                 with_c_code=True,
                 full_class_names=False):
    """Converts a SymPy expression to a serializable dict (mainly for debugging purposes)

    The dict recursively contains all args of the expression as ``dict``s

    See :func:`.write_json`

    Args:
        expr_or_node (NodeOrExpr): a SymPy expression or a :class:`pystencils.astnodes.Node`
        with_c_code (bool, optional): include C representation of the nodes
        full_class_names (bool, optional): use full class names (type(object) instead of ``type(object).__name__``
    """

    self = {'str': str(expr_or_node)}
    if with_c_code:
        try:
            self.update({'c': generate_c(expr_or_node)})
        except Exception:
            try:
                self.update({'c': CustomSympyPrinter().doprint(expr_or_node)})
            except Exception:
                pass
    for a in expr_or_node.args:
        self.update({
            str(a.__class__ if full_class_names else a.__class__.__name__):
            expr_to_dict(a)
        })

    return self
Exemplo n.º 4
0
def generate_declaration(kernel_info, target='cpu'):
    """Generates the declaration of the kernel function"""
    ast = kernel_info.ast
    result = generate_c(ast,
                        signature_only=True,
                        dialect='cuda' if target == 'gpu' else 'c') + ";"
    result = "namespace internal_%s {\n%s\n}" % (
        ast.function_name,
        result,
    )
    return result
Exemplo n.º 5
0
def generate_opencl(astnode: Node, signature_only: bool = False) -> str:
    """Prints an abstract syntax tree node (made for target 'gpu') as OpenCL code.

    Args:
        astnode: KernelFunction node to generate code for
        signature_only: if True only the signature is printed

    Returns:
        C-like code for the ast node and its descendants
    """
    return generate_c(astnode, signature_only, dialect='opencl')
Exemplo n.º 6
0
def generate_cuda(astnode: Node, signature_only: bool = False) -> str:
    """Prints an abstract syntax tree node as CUDA code.

    Args:
        astnode: KernelFunction node to generate code for
        signature_only: if True only the signature is printed

    Returns:
        C-like code for the ast node and its descendants
    """
    return generate_c(astnode, signature_only, dialect='cuda')
Exemplo n.º 7
0
def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
    """Return C code of a benchmark program for the given kernel.

    Args:
        ast: the pystencils AST object as returned by create_kernel
        likwid: if True likwid markers are added to the code
        openmp: relevant only if likwid=True, to generated correct likwid initialization code
        timing: add timing output to the code, prints time per iteration to stdout

    Returns:
        C code as string
    """
    accessed_fields = {f.name: f for f in ast.fields_accessed}
    constants = []
    fields = []
    call_parameters = []
    for p in ast.get_parameters():
        if not p.is_field_parameter:
            constants.append((p.symbol.name, str(p.symbol.dtype)))
            call_parameters.append(p.symbol.name)
        else:
            assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
            field = accessed_fields[p.field_name]
            dtype = str(get_base_type(p.symbol.dtype))
            fields.append((p.field_name, dtype, prod(field.shape)))
            call_parameters.append(p.field_name)

    header_list = get_headers(ast)
    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])

    # Strip "#pragma omp parallel" from within kernel, because main function takes care of that
    # when likwid and openmp are enabled
    if likwid and openmp:
        if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
            ast.body.args[0].pragma_line = ''

    args = {
        'likwid': likwid,
        'openmp': openmp,
        'kernel_code': generate_c(ast, dialect='c'),
        'kernelName': ast.function_name,
        'fields': fields,
        'constants': constants,
        'call_argument_list': ",".join(call_parameters),
        'includes': includes,
        'timing': timing,
    }
    return benchmark_template.render(**args)
Exemplo n.º 8
0
def generate_cuda(ast_node: Node,
                  signature_only: bool = False,
                  custom_backend=None,
                  with_globals=True) -> str:
    """Prints an abstract syntax tree node as CUDA code.

    Args:
        ast_node: ast representation of kernel
        signature_only: generate signature without function body
        custom_backend: use own custom printer for code generation
        with_globals: enable usage of global variables

    Returns:
        CUDA code for the ast node and its descendants
    """
    return generate_c(ast_node,
                      signature_only,
                      dialect=Backend.CUDA,
                      custom_backend=custom_backend,
                      with_globals=with_globals)
Exemplo n.º 9
0
    def create_code_string(self, restrict_qualifier, function_prefix):
        self._code_string = str()

        headers = {'<math.h>', '<stdint.h>'}
        for ast in self._ast_nodes:
            headers.update(get_headers(ast))
        header_list = list(headers)
        header_list.sort()
        header_list.insert(0, '"Python.h"')
        ps_headers = [
            os.path.join(os.path.dirname(__file__), '..', 'include', h[1:-1])
            for h in header_list if os.path.exists(
                os.path.join(os.path.dirname(__file__), '..', 'include',
                             h[1:-1]))
        ]
        header_hash = b''.join([
            hashlib.sha256(open(h, 'rb').read()).digest() for h in ps_headers
        ])

        includes = "\n".join(
            [f"#include {include_file}" for include_file in header_list])
        self._code_string += includes
        self._code_string += "\n"
        self._code_string += f"#define RESTRICT {restrict_qualifier} \n"
        self._code_string += f"#define FUNC_PREFIX {function_prefix}"
        self._code_string += "\n"

        for ast, name in zip(self._ast_nodes, self._function_names):
            old_name = ast.function_name
            ast.function_name = f"kernel_{name}"
            self._code_string += generate_c(
                ast, custom_backend=self._custom_backend)
            self._code_string += create_function_boilerplate_code(
                ast.get_parameters(), name, ast)
            ast.function_name = old_name

        self._code_hash = "mod_" + hashlib.sha256(self._code_string.encode() +
                                                  header_hash).hexdigest()
        self._code_string += create_module_boilerplate_code(
            self._code_hash, self._function_names)
Exemplo n.º 10
0
def compile_and_load(ast, custom_backend=None):
    cache_config = get_cache_config()
    code_hash_str = "mod_" + hashlib.sha256(
        generate_c(ast, dialect='c',
                   custom_backend=custom_backend).encode()).hexdigest()
    code = ExtensionModuleCode(module_name=code_hash_str,
                               custom_backend=custom_backend)
    code.add_function(ast, ast.function_name)

    if cache_config['object_cache'] is False:
        with TemporaryDirectory() as base_dir:
            lib_file = compile_module(code, code_hash_str, base_dir)
            result = load_kernel_from_file(code_hash_str, ast.function_name,
                                           lib_file)
    else:
        lib_file = compile_module(code,
                                  code_hash_str,
                                  base_dir=cache_config['object_cache'])
        result = load_kernel_from_file(code_hash_str, ast.function_name,
                                       lib_file)

    return KernelWrapper(result, ast.get_parameters(), ast)
Exemplo n.º 11
0
 def __repr__(self):
     return generate_c(self.ast,
                       dialect=dialect,
                       custom_backend=custom_backend)
Exemplo n.º 12
0
 def _repr_html_(self):
     return highlight_cpp(
         generate_c(self.ast,
                    dialect=dialect,
                    custom_backend=custom_backend)).__html__()
Exemplo n.º 13
0
def make_python_function(kernel_function_node,
                         argument_dict=None,
                         custom_backend=None):
    """
    Creates a kernel function from an abstract syntax tree which
    was created e.g. by :func:`pystencils.gpucuda.create_cuda_kernel`
    or :func:`pystencils.gpucuda.created_indexed_cuda_kernel`

    Args:
        kernel_function_node: the abstract syntax tree
        argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
                       returned kernel functor.

    Returns:
        compiled kernel as Python function
    """
    import pycuda.autoinit  # NOQA
    from pycuda.compiler import SourceModule

    if argument_dict is None:
        argument_dict = {}

    header_list = ['<stdint.h>'] + list(get_headers(kernel_function_node))
    includes = "\n".join(
        ["#include %s" % (include_file, ) for include_file in header_list])

    code = includes + "\n"
    code += "#define FUNC_PREFIX __global__\n"
    code += "#define RESTRICT __restrict__\n\n"
    code += str(
        generate_c(kernel_function_node,
                   dialect='cuda',
                   custom_backend=custom_backend))
    options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
    if USE_FAST_MATH:
        options.append("-use_fast_math")
    mod = SourceModule(code,
                       options=options,
                       include_dirs=[get_pystencils_include_path()])
    func = mod.get_function(kernel_function_node.function_name)

    parameters = kernel_function_node.get_parameters()

    cache = {}
    cache_values = []

    def wrapper(**kwargs):
        key = hash(
            tuple((k, v.ctypes.data, v.strides,
                   v.shape) if isinstance(v, np.ndarray) else (k, id(v))
                  for k, v in kwargs.items()))
        try:
            args, block_and_thread_numbers = cache[key]
            func(*args, **block_and_thread_numbers)
        except KeyError:
            full_arguments = argument_dict.copy()
            full_arguments.update(kwargs)
            shape = _check_arguments(parameters, full_arguments)

            indexing = kernel_function_node.indexing
            block_and_thread_numbers = indexing.call_parameters(shape)
            block_and_thread_numbers['block'] = tuple(
                int(i) for i in block_and_thread_numbers['block'])
            block_and_thread_numbers['grid'] = tuple(
                int(i) for i in block_and_thread_numbers['grid'])

            args = _build_numpy_argument_list(parameters, full_arguments)
            cache[key] = (args, block_and_thread_numbers)
            cache_values.append(
                kwargs)  # keep objects alive such that ids remain unique
            func(*args, **block_and_thread_numbers)
        # import pycuda.driver as cuda
        # cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called

    wrapper.ast = kernel_function_node
    wrapper.parameters = kernel_function_node.get_parameters()
    wrapper.num_regs = func.num_regs
    return wrapper
Exemplo n.º 14
0
def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None):
    """
    Creates a **OpenCL** kernel function from an abstract syntax tree which
    was created for the ``target='gpu'`` e.g. by :func:`pystencils.gpucuda.create_cuda_kernel`
    or :func:`pystencils.gpucuda.created_indexed_cuda_kernel`

    Args:
        opencl_queue: a valid :class:`pyopencl.CommandQueue`
        opencl_ctx: a valid :class:`pyopencl.Context`
        kernel_function_node: the abstract syntax tree
        argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
                       returned kernel functor.

    Returns:
        compiled kernel as Python function
    """
    import pyopencl as cl
    assert opencl_ctx, "No valid OpenCL context"
    assert opencl_queue, "No valid OpenCL queue"

    if argument_dict is None:
        argument_dict = {}

    # Changing of kernel name necessary since compilation with default name "kernel" is not possible (OpenCL keyword!)
    kernel_function_node.function_name = "opencl_" + kernel_function_node.function_name
    header_list = ['"opencl_stdint.h"'] + list(get_headers(kernel_function_node))
    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])

    code = includes + "\n"
    code += "#define FUNC_PREFIX __kernel\n"
    code += "#define RESTRICT restrict\n\n"
    code += str(generate_c(kernel_function_node, dialect='opencl', custom_backend=custom_backend))
    options = []
    if USE_FAST_MATH:
        options.append("-cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math -cl-finite-math-only")
    options.append("-I \"" + get_pystencils_include_path() + "\"")
    mod = cl.Program(opencl_ctx, code).build(options=options)
    func = getattr(mod, kernel_function_node.function_name)

    parameters = kernel_function_node.get_parameters()

    cache = {}
    cache_values = []

    def wrapper(**kwargs):
        key = hash(tuple((k, v.ctypes.data, v.strides, v.shape) if isinstance(v, np.ndarray) else (k, id(v))
                         for k, v in kwargs.items()))
        try:
            args, block_and_thread_numbers = cache[key]
            func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args)
        except KeyError:
            full_arguments = argument_dict.copy()
            full_arguments.update(kwargs)
            shape = _check_arguments(parameters, full_arguments)

            indexing = kernel_function_node.indexing
            block_and_thread_numbers = indexing.call_parameters(shape)
            block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block'])
            block_and_thread_numbers['grid'] = tuple(int(b * g) for (b, g) in zip(block_and_thread_numbers['block'],
                                                                                  block_and_thread_numbers['grid']))

            args = _build_numpy_argument_list(parameters, full_arguments)
            args = [a.data if hasattr(a, 'data') else a for a in args]
            cache[key] = (args, block_and_thread_numbers)
            cache_values.append(kwargs)  # keep objects alive such that ids remain unique
            func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args)

    wrapper.ast = kernel_function_node
    wrapper.parameters = kernel_function_node.get_parameters()
    return wrapper