def find_neighbors_for_partition(partition_cids, partition_size, partition_wgs, q=None): find_neighbors = self.helper.get_kernel('find_neighbors', sorted=self.sorted, wgs=wgs) find_neighbors(partition_cids.dev, tree_src.pids.dev, self.pids.dev, self.cids.dev, tree_src.pbounds.dev, self.pbounds.dev, pa_gpu_src.x.dev, pa_gpu_src.y.dev, pa_gpu_src.z.dev, pa_gpu_src.h.dev, pa_gpu_dst.x.dev, pa_gpu_dst.y.dev, pa_gpu_dst.z.dev, pa_gpu_dst.h.dev, dtype(self.radius_scale), neighbor_cid_count.dev, neighbor_cids.dev, start_indices.dev, neighbors.dev, gs=(partition_wgs * partition_size, ), ls=(partition_wgs, ), queue=(get_queue() if q is None else q))
def sort_by_keys(ary_list, out_list=None, key_bits=None, backend=None): # first arg of ary_list is the key if backend is None: backend = ary_list[0].backend if backend == 'opencl': from .jit import get_ctype_from_arg from compyle.opencl import get_queue arg_types = [get_ctype_from_arg(arg) for arg in ary_list] sort_knl = get_cl_sort_kernel(arg_types, ary_list) allocator = get_allocator(get_queue()) arg_list = [ary.dev for ary in ary_list] out_list, event = sort_knl(*arg_list, key_bits=key_bits, allocator=allocator) return out_list else: order = argsort(ary_list[0], backend=backend) out_list = align(ary_list[1:], order, out_list=out_list, backend=backend) return [ary_list[0]] + out_list
def __init__(self, acceleration_eval): self.object = acceleration_eval self.backend = acceleration_eval.backend self.all_array_names = get_all_array_names( self.object.particle_arrays ) self.known_types = get_known_types_for_arrays( self.all_array_names ) add_address_space(self.known_types) predefined = dict(get_predefined_types( self.object.all_group.pre_comp )) self.known_types.update(predefined) self.known_types['NBRS'] = KnownType('GLOBAL_MEM unsigned int*') self.data = [] self._ctx = get_context(self.backend) self._queue = get_queue(self.backend) self._array_map = None self._array_index = None self._equations = {} self._cpu_structs = {} self._gpu_structs = {} self.calls = [] self.program = None
def linspace(start, stop, num, dtype=np.float64, backend='opencl', endpoint=True): if not type(num) == int: raise TypeError("num should be an integer but got %s" % type(num)) if num <= 0: raise ValueError("Number of samples, %s, must be positive." % num) if backend == 'opencl': import pyopencl.array as gpuarray from .opencl import get_queue if endpoint: delta = (stop - start) / (num - 1) else: delta = (stop - start) / num out = gpuarray.arange(get_queue(), 0, num, 1, dtype=dtype) out = out * delta + start elif backend == 'cuda': import pycuda.gpuarray as gpuarray import pycuda.autoinit if endpoint: delta = (stop - start) / (num - 1) else: delta = (stop - start) / num out = gpuarray.arange(0, num, 1, dtype=dtype) out = out * delta + start else: out = np.linspace(start, stop, num, endpoint=endpoint, dtype=dtype) return wrap_array(out, backend)
def get_queue(backend): if backend == 'cuda': return DummyQueue() elif backend == 'opencl': from compyle.opencl import get_queue return get_queue() else: raise RuntimeError('Unsupported GPU backend %s' % backend)
def sort_by_keys(ary_list, out_list=None, key_bits=None, backend=None, use_radix_sort=False): # FIXME: Need to use returned values, cuda backend uses # thrust that will internally allocate a new array for storing # the sorted data so out_list will not have the sorted arrays # first arg of ary_list is the key if backend is None: backend = ary_list[0].backend if backend == 'opencl': from .jit import get_ctype_from_arg from compyle.opencl import get_queue if not out_list: out_list = [ Array(ary.dtype, allocate=False, backend=backend) for ary in ary_list ] arg_types = [ get_ctype_from_arg(arg, backend=backend) for arg in ary_list ] sort_knl = get_cl_sort_kernel(arg_types, ary_list) allocator = get_allocator(get_queue()) arg_list = [ary.dev for ary in ary_list] out_arrays, event = sort_knl(*arg_list, key_bits=key_bits, allocator=allocator) for i, out in enumerate(out_list): out.set_data(out_arrays[i]) return out_list elif backend == 'cython' and use_radix_sort: out_list, order = radix_sort(ary_list, out_list=out_list, max_key_bits=key_bits, backend=backend) return out_list elif backend == 'cython': order = wrap(np.argsort(ary_list[0].dev), backend=backend) out_list = align(ary_list, order, out_list=out_list, backend=backend) return out_list else: order = argsort(ary_list[0], backend=backend) modified_out_list = None if out_list: modified_out_list = out_list[1:] out_list = align(ary_list[1:], order, out_list=modified_out_list, backend=backend) return [ary_list[0]] + out_list
def arange(start, stop, step, dtype=np.int32, backend='cython'): if backend == 'opencl': import pyopencl.array as gpuarray from .opencl import get_queue out = gpuarray.arange(get_queue(), start, stop, step, dtype=dtype) elif backend == 'cuda': import pycuda.gpuarray as gpuarray out = gpuarray.arange(start, stop, step, dtype=dtype) else: out = np.arange(start, stop, step, dtype=dtype) return wrap_array(out, backend)
def zeros(n, dtype, backend='cython'): if backend == 'opencl': import pyopencl.array as gpuarray from .opencl import get_queue out = gpuarray.zeros(get_queue(), n, dtype) elif backend == 'cuda': import pycuda.gpuarray as gpuarray out = gpuarray.zeros(n, dtype) else: out = np.zeros(n, dtype=dtype) return wrap_array(out, backend)
def to_device(array, backend='cython'): if backend == 'cython': out = array elif backend == 'opencl': import pyopencl.array as gpuarray from .opencl import get_queue out = gpuarray.to_device(get_queue(), array) elif backend == 'cuda': import pycuda.gpuarray as gpuarray out = gpuarray.to_gpu(array) return wrap_array(out, backend)
def __init__(self, n, k=8, leaf_size=32): self.ctx = get_context() self.queue = get_queue() self.sorted = False self.main_helper = get_helper(os.path.join('tree', 'tree.mako')) self.initialized = False self.preamble = "" self.leaf_size = leaf_size self.k = k self.n = n self.sorted = False self.depth = 0 self.index_function_args = [] self.index_function_arg_ctypes = [] self.index_function_arg_dtypes = [] self.index_function_consts = [] self.index_function_const_ctypes = [] self.index_code = "" self.set_index_function_info()
def wrap_array(arr, backend): wrapped_array = Array(arr.dtype, allocate=False, backend=backend) if isinstance(arr, np.ndarray): wrapped_array.data = arr if backend == 'opencl' or backend == 'cuda': use_double = get_config().use_double _dtype = np.float64 if use_double else np.float32 if np.issubdtype(arr.dtype, np.floating): wrapped_array.dtype = _dtype wrapped_array.data = arr.astype(_dtype) q = None if backend == 'opencl': from .opencl import get_queue from pyopencl.array import to_device q = get_queue() if arr is not None: dev_ary = to_device(q, wrapped_array.data) wrapped_array.set_data(dev_ary) elif backend == 'cuda': from .cuda import set_context set_context() from pycuda.gpuarray import to_gpu if arr is not None: dev_ary = to_gpu(wrapped_array.data) wrapped_array.set_data(dev_ary) else: wrapped_array.set_data(wrapped_array.data) elif backend == 'opencl': import pyopencl.array as gpuarray if isinstance(arr, gpuarray.Array): wrapped_array.set_data(arr) elif backend == 'cuda': import pycuda.gpuarray as gpuarray if isinstance(arr, gpuarray.GPUArray): wrapped_array.set_data(arr) return wrapped_array