示例#1
0
    def compile(self,
                extra_source_files=[],
                extra_cuda_flags=[],
                with_cuda=None,
                build_dir=None,
                compile_module_name=None):
        from torch.utils.cpp_extension import load
        file_extension = '.cu' if self.is_cuda else '.cpp'
        source_code = str(self)
        hash = _hash(source_code.encode()).hexdigest()
        if not build_dir:
            build_dir = join(get_cache_config()['object_cache'], self.module_name)
        os.makedirs(build_dir, exist_ok=True)
        file_name = join(build_dir, f'{hash}{file_extension}')

        self.compiled_file = join(build_dir, compile_module_name or file_name).replace('.cpp', '') + '.so'

        if not exists(file_name):
            write_file(file_name, source_code)

        # Torch regards CXX
        os.environ['CXX'] = get_compiler_config()['command']

        torch_extension = load(compile_module_name or hash,
                               [file_name] + extra_source_files,
                               with_cuda=self.is_cuda or with_cuda,
                               extra_cflags=['--std=c++14', get_compiler_config()
                                             ['flags'].replace('--std=c++11', '')],
                               extra_cuda_cflags=['-std=c++14', '-ccbin',
                                                  get_compiler_config()['command']] + extra_cuda_flags,
                               build_directory=build_dir,
                               extra_include_paths=[get_pycuda_include_path(),
                                                    get_pystencils_include_path(),
                                                    *get_cubic_interpolation_include_paths()])
        return torch_extension
示例#2
0
def compile_module(code, code_hash, base_dir):
    compiler_config = get_compiler_config()
    extra_flags = [
        '-I' + get_paths()['include'], '-I' + get_pystencils_include_path()
    ]

    if compiler_config['os'].lower() == 'windows':
        function_prefix = '__declspec(dllexport)'
        lib_suffix = '.pyd'
        object_suffix = '.obj'
        windows = True
    else:
        function_prefix = ''
        lib_suffix = '.so'
        object_suffix = '.o'
        windows = False

    src_file = os.path.join(base_dir, code_hash + ".cpp")
    lib_file = os.path.join(base_dir, code_hash + lib_suffix)
    object_file = os.path.join(base_dir, code_hash + object_suffix)

    if not os.path.exists(object_file):
        with file_handle_for_atomic_write(src_file) as f:
            code.write_to_file(compiler_config['restrict_qualifier'],
                               function_prefix, f)

        if windows:
            compile_cmd = ['cl.exe', '/c', '/EHsc'
                           ] + compiler_config['flags'].split()
            compile_cmd += [*extra_flags, src_file, '/Fo' + object_file]
            run_compile_step(compile_cmd)
        else:
            with atomic_file_write(object_file) as file_name:
                compile_cmd = [compiler_config['command'], '-c'
                               ] + compiler_config['flags'].split()
                compile_cmd += [*extra_flags, '-o', file_name, src_file]
                run_compile_step(compile_cmd)

        # Linking
        if windows:
            import sysconfig
            config_vars = sysconfig.get_config_vars()
            py_lib = os.path.join(
                config_vars["installed_base"], "libs",
                "python{}.lib".format(config_vars["py_version_nodot"]))
            run_compile_step(
                ['link.exe', py_lib, '/DLL', '/out:' + lib_file, object_file])
        elif platform.system().lower() == 'darwin':
            with atomic_file_write(lib_file) as file_name:
                run_compile_step([
                    compiler_config['command'], '-shared', object_file, '-o',
                    file_name, '-undefined', 'dynamic_lookup'
                ] + compiler_config['flags'].split())
        else:
            with atomic_file_write(lib_file) as file_name:
                run_compile_step([
                    compiler_config['command'], '-shared', object_file, '-o',
                    file_name
                ] + compiler_config['flags'].split())
    return lib_file
示例#3
0
    def compile(self):
        try:
            import cppimport
        except ImportError:
            try:
                from torch.utils.cpp_extension import load
            except Exception:
                assert False, 'cppimport or torch ist required for compiling pybind11 modules'

        assert not self.is_cuda

        source_code = self.CPP_IMPORT_PREFIX + str(self)
        hash_str = _hash(source_code.encode()).hexdigest()
        source_code_with_hash = source_code.replace(
            f'PYBIND11_MODULE({self.module_name}',
            f'PYBIND11_MODULE(cppimport_{hash_str}')

        cache_dir = join(get_cache_config()['object_cache'])
        file_name = join(cache_dir, f'cppimport_{hash_str}.cpp')

        os.makedirs(cache_dir, exist_ok=True)
        if not exists(file_name):
            write_file(file_name, source_code_with_hash)
        # TODO: propagate extra headers
        if cache_dir not in sys.path:
            sys.path.append(cache_dir)

        # Torch regards CXX
        os.environ['CXX'] = get_compiler_config()['command']

        try:
            torch_extension = cppimport.imp(f'cppimport_{hash_str}')
        except Exception as e:
            print(e)
            torch_extension = load(hash,
                                   [file_name],
                                   with_cuda=self.is_cuda,
                                   extra_cflags=['--std=c++14',
                                                 get_compiler_config()['flags'].replace('--std=c++11', '')],
                                   extra_cuda_cflags=['-std=c++14', '-ccbin', get_compiler_config()['command']],
                                   build_directory=cache_dir,
                                   extra_include_paths=[get_pycuda_include_path(),
                                                        get_pystencils_include_path()])
        return torch_extension
示例#4
0
def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
    """Runs the given kernel with outer loop in C

    Args:
        ast:
        inner_iterations: timings are recorded around this many iterations
        outer_iterations: number of timings recorded

    Returns:
        list of times per iterations for each outer iteration
    """
    import kerncraft

    benchmark_code = generate_benchmark(ast, timing=True)
    with open('bench.c', 'w') as f:
        f.write(benchmark_code)

    kerncraft_path = os.path.dirname(kerncraft.__file__)

    extra_flags = ['-I' + get_pystencils_include_path(),
                   '-I' + os.path.join(kerncraft_path, 'headers')]

    compiler_config = get_compiler_config()
    compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
    compile_cmd += [*extra_flags,
                    os.path.join(kerncraft_path, 'headers', 'timing.c'),
                    os.path.join(kerncraft_path, 'headers', 'dummy.c'),
                    'bench.c',
                    '-o', 'bench',
                    ]
    run_compile_step(compile_cmd)

    results = []
    for _ in range(outer_iterations):
        benchmark_time = float(subprocess.check_output(['./bench', str(inner_iterations)]))
        results.append(benchmark_time)
    return results
示例#5
0
def make_python_function(kernel_function_node,
                         argument_dict=None,
                         custom_backend=None):
    """
    Creates a kernel function from an abstract syntax tree which
    was created e.g. by :func:`pystencils.gpucuda.create_cuda_kernel`
    or :func:`pystencils.gpucuda.created_indexed_cuda_kernel`

    Args:
        kernel_function_node: the abstract syntax tree
        argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
                       returned kernel functor.

    Returns:
        compiled kernel as Python function
    """
    import pycuda.autoinit  # NOQA
    from pycuda.compiler import SourceModule

    if argument_dict is None:
        argument_dict = {}

    header_list = ['<stdint.h>'] + list(get_headers(kernel_function_node))
    includes = "\n".join(
        ["#include %s" % (include_file, ) for include_file in header_list])

    code = includes + "\n"
    code += "#define FUNC_PREFIX __global__\n"
    code += "#define RESTRICT __restrict__\n\n"
    code += str(
        generate_c(kernel_function_node,
                   dialect='cuda',
                   custom_backend=custom_backend))
    options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
    if USE_FAST_MATH:
        options.append("-use_fast_math")
    mod = SourceModule(code,
                       options=options,
                       include_dirs=[get_pystencils_include_path()])
    func = mod.get_function(kernel_function_node.function_name)

    parameters = kernel_function_node.get_parameters()

    cache = {}
    cache_values = []

    def wrapper(**kwargs):
        key = hash(
            tuple((k, v.ctypes.data, v.strides,
                   v.shape) if isinstance(v, np.ndarray) else (k, id(v))
                  for k, v in kwargs.items()))
        try:
            args, block_and_thread_numbers = cache[key]
            func(*args, **block_and_thread_numbers)
        except KeyError:
            full_arguments = argument_dict.copy()
            full_arguments.update(kwargs)
            shape = _check_arguments(parameters, full_arguments)

            indexing = kernel_function_node.indexing
            block_and_thread_numbers = indexing.call_parameters(shape)
            block_and_thread_numbers['block'] = tuple(
                int(i) for i in block_and_thread_numbers['block'])
            block_and_thread_numbers['grid'] = tuple(
                int(i) for i in block_and_thread_numbers['grid'])

            args = _build_numpy_argument_list(parameters, full_arguments)
            cache[key] = (args, block_and_thread_numbers)
            cache_values.append(
                kwargs)  # keep objects alive such that ids remain unique
            func(*args, **block_and_thread_numbers)
        # import pycuda.driver as cuda
        # cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called

    wrapper.ast = kernel_function_node
    wrapper.parameters = kernel_function_node.get_parameters()
    wrapper.num_regs = func.num_regs
    return wrapper
示例#6
0
def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None):
    """
    Creates a **OpenCL** kernel function from an abstract syntax tree which
    was created for the ``target='gpu'`` e.g. by :func:`pystencils.gpucuda.create_cuda_kernel`
    or :func:`pystencils.gpucuda.created_indexed_cuda_kernel`

    Args:
        opencl_queue: a valid :class:`pyopencl.CommandQueue`
        opencl_ctx: a valid :class:`pyopencl.Context`
        kernel_function_node: the abstract syntax tree
        argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
                       returned kernel functor.

    Returns:
        compiled kernel as Python function
    """
    import pyopencl as cl
    assert opencl_ctx, "No valid OpenCL context"
    assert opencl_queue, "No valid OpenCL queue"

    if argument_dict is None:
        argument_dict = {}

    # Changing of kernel name necessary since compilation with default name "kernel" is not possible (OpenCL keyword!)
    kernel_function_node.function_name = "opencl_" + kernel_function_node.function_name
    header_list = ['"opencl_stdint.h"'] + list(get_headers(kernel_function_node))
    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])

    code = includes + "\n"
    code += "#define FUNC_PREFIX __kernel\n"
    code += "#define RESTRICT restrict\n\n"
    code += str(generate_c(kernel_function_node, dialect='opencl', custom_backend=custom_backend))
    options = []
    if USE_FAST_MATH:
        options.append("-cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math -cl-finite-math-only")
    options.append("-I \"" + get_pystencils_include_path() + "\"")
    mod = cl.Program(opencl_ctx, code).build(options=options)
    func = getattr(mod, kernel_function_node.function_name)

    parameters = kernel_function_node.get_parameters()

    cache = {}
    cache_values = []

    def wrapper(**kwargs):
        key = hash(tuple((k, v.ctypes.data, v.strides, v.shape) if isinstance(v, np.ndarray) else (k, id(v))
                         for k, v in kwargs.items()))
        try:
            args, block_and_thread_numbers = cache[key]
            func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args)
        except KeyError:
            full_arguments = argument_dict.copy()
            full_arguments.update(kwargs)
            shape = _check_arguments(parameters, full_arguments)

            indexing = kernel_function_node.indexing
            block_and_thread_numbers = indexing.call_parameters(shape)
            block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block'])
            block_and_thread_numbers['grid'] = tuple(int(b * g) for (b, g) in zip(block_and_thread_numbers['block'],
                                                                                  block_and_thread_numbers['grid']))

            args = _build_numpy_argument_list(parameters, full_arguments)
            args = [a.data if hasattr(a, 'data') else a for a in args]
            cache[key] = (args, block_and_thread_numbers)
            cache_values.append(kwargs)  # keep objects alive such that ids remain unique
            func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args)

    wrapper.ast = kernel_function_node
    wrapper.parameters = kernel_function_node.get_parameters()
    return wrapper
示例#7
0
def compile_module(code, code_hash, base_dir, compile_flags=None):
    if compile_flags is None:
        compile_flags = []

    compiler_config = get_compiler_config()
    extra_flags = [
        '-I' + get_paths()['include'], '-I' + get_pystencils_include_path()
    ] + compile_flags

    if compiler_config['os'].lower() == 'windows':
        lib_suffix = '.pyd'
        object_suffix = '.obj'
        windows = True
    else:
        lib_suffix = '.so'
        object_suffix = '.o'
        windows = False

    src_file = os.path.join(base_dir, code_hash + ".cpp")
    lib_file = os.path.join(base_dir, code_hash + lib_suffix)
    object_file = os.path.join(base_dir, code_hash + object_suffix)

    if not os.path.exists(object_file):
        try:
            with open(src_file, 'x') as f:
                code.write_to_file(f)
        except FileExistsError:
            pass

        if windows:
            compile_cmd = ['cl.exe', '/c', '/EHsc'
                           ] + compiler_config['flags'].split()
            compile_cmd += [*extra_flags, src_file, '/Fo' + object_file]
            run_compile_step(compile_cmd)
        else:
            with atomic_file_write(object_file) as file_name:
                compile_cmd = [compiler_config['command'], '-c'
                               ] + compiler_config['flags'].split()
                compile_cmd += [*extra_flags, '-o', file_name, src_file]
                run_compile_step(compile_cmd)

        # Linking
        if windows:
            import sysconfig
            config_vars = sysconfig.get_config_vars()
            py_lib = os.path.join(
                config_vars["installed_base"], "libs",
                f"python{config_vars['py_version_nodot']}.lib")
            run_compile_step(
                ['link.exe', py_lib, '/DLL', '/out:' + lib_file, object_file])
        elif platform.system().lower() == 'darwin':
            with atomic_file_write(lib_file) as file_name:
                run_compile_step([
                    compiler_config['command'], '-shared', object_file, '-o',
                    file_name, '-undefined', 'dynamic_lookup'
                ] + compiler_config['flags'].split())
        else:
            with atomic_file_write(lib_file) as file_name:
                run_compile_step([
                    compiler_config['command'], '-shared', object_file, '-o',
                    file_name
                ] + compiler_config['flags'].split())
    return lib_file