Пример #1
0
 def find_neighbors_for_partition(partition_cids,
                                  partition_size,
                                  partition_wgs,
                                  q=None):
     find_neighbors = self.helper.get_kernel('find_neighbors',
                                             sorted=self.sorted,
                                             wgs=wgs)
     find_neighbors(partition_cids.dev,
                    tree_src.pids.dev,
                    self.pids.dev,
                    self.cids.dev,
                    tree_src.pbounds.dev,
                    self.pbounds.dev,
                    pa_gpu_src.x.dev,
                    pa_gpu_src.y.dev,
                    pa_gpu_src.z.dev,
                    pa_gpu_src.h.dev,
                    pa_gpu_dst.x.dev,
                    pa_gpu_dst.y.dev,
                    pa_gpu_dst.z.dev,
                    pa_gpu_dst.h.dev,
                    dtype(self.radius_scale),
                    neighbor_cid_count.dev,
                    neighbor_cids.dev,
                    start_indices.dev,
                    neighbors.dev,
                    gs=(partition_wgs * partition_size, ),
                    ls=(partition_wgs, ),
                    queue=(get_queue() if q is None else q))
Пример #2
0
def sort_by_keys(ary_list, out_list=None, key_bits=None, backend=None):
    # first arg of ary_list is the key
    if backend is None:
        backend = ary_list[0].backend
    if backend == 'opencl':
        from .jit import get_ctype_from_arg
        from compyle.opencl import get_queue

        arg_types = [get_ctype_from_arg(arg) for arg in ary_list]

        sort_knl = get_cl_sort_kernel(arg_types, ary_list)
        allocator = get_allocator(get_queue())

        arg_list = [ary.dev for ary in ary_list]

        out_list, event = sort_knl(*arg_list,
                                   key_bits=key_bits,
                                   allocator=allocator)
        return out_list
    else:
        order = argsort(ary_list[0], backend=backend)
        out_list = align(ary_list[1:],
                         order,
                         out_list=out_list,
                         backend=backend)
        return [ary_list[0]] + out_list
Пример #3
0
 def __init__(self, acceleration_eval):
     self.object = acceleration_eval
     self.backend = acceleration_eval.backend
     self.all_array_names = get_all_array_names(
         self.object.particle_arrays
     )
     self.known_types = get_known_types_for_arrays(
         self.all_array_names
     )
     add_address_space(self.known_types)
     predefined = dict(get_predefined_types(
         self.object.all_group.pre_comp
     ))
     self.known_types.update(predefined)
     self.known_types['NBRS'] = KnownType('GLOBAL_MEM unsigned int*')
     self.data = []
     self._ctx = get_context(self.backend)
     self._queue = get_queue(self.backend)
     self._array_map = None
     self._array_index = None
     self._equations = {}
     self._cpu_structs = {}
     self._gpu_structs = {}
     self.calls = []
     self.program = None
Пример #4
0
def linspace(start,
             stop,
             num,
             dtype=np.float64,
             backend='opencl',
             endpoint=True):
    if not type(num) == int:
        raise TypeError("num should be an integer but got %s" % type(num))
    if num <= 0:
        raise ValueError("Number of samples, %s, must be positive." % num)
    if backend == 'opencl':
        import pyopencl.array as gpuarray
        from .opencl import get_queue
        if endpoint:
            delta = (stop - start) / (num - 1)
        else:
            delta = (stop - start) / num
        out = gpuarray.arange(get_queue(), 0, num, 1, dtype=dtype)
        out = out * delta + start
    elif backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        import pycuda.autoinit
        if endpoint:
            delta = (stop - start) / (num - 1)
        else:
            delta = (stop - start) / num
        out = gpuarray.arange(0, num, 1, dtype=dtype)
        out = out * delta + start
    else:
        out = np.linspace(start, stop, num, endpoint=endpoint, dtype=dtype)
    return wrap_array(out, backend)
Пример #5
0
def get_queue(backend):
    if backend == 'cuda':
        return DummyQueue()
    elif backend == 'opencl':
        from compyle.opencl import get_queue
        return get_queue()
    else:
        raise RuntimeError('Unsupported GPU backend %s' % backend)
Пример #6
0
def sort_by_keys(ary_list,
                 out_list=None,
                 key_bits=None,
                 backend=None,
                 use_radix_sort=False):
    # FIXME: Need to use returned values, cuda backend uses
    # thrust that will internally allocate a new array for storing
    # the sorted data so out_list will not have the sorted arrays
    # first arg of ary_list is the key
    if backend is None:
        backend = ary_list[0].backend
    if backend == 'opencl':
        from .jit import get_ctype_from_arg
        from compyle.opencl import get_queue

        if not out_list:
            out_list = [
                Array(ary.dtype, allocate=False, backend=backend)
                for ary in ary_list
            ]

        arg_types = [
            get_ctype_from_arg(arg, backend=backend) for arg in ary_list
        ]

        sort_knl = get_cl_sort_kernel(arg_types, ary_list)
        allocator = get_allocator(get_queue())

        arg_list = [ary.dev for ary in ary_list]

        out_arrays, event = sort_knl(*arg_list,
                                     key_bits=key_bits,
                                     allocator=allocator)
        for i, out in enumerate(out_list):
            out.set_data(out_arrays[i])
        return out_list
    elif backend == 'cython' and use_radix_sort:
        out_list, order = radix_sort(ary_list,
                                     out_list=out_list,
                                     max_key_bits=key_bits,
                                     backend=backend)
        return out_list
    elif backend == 'cython':
        order = wrap(np.argsort(ary_list[0].dev), backend=backend)
        out_list = align(ary_list, order, out_list=out_list, backend=backend)
        return out_list
    else:
        order = argsort(ary_list[0], backend=backend)
        modified_out_list = None
        if out_list:
            modified_out_list = out_list[1:]
        out_list = align(ary_list[1:],
                         order,
                         out_list=modified_out_list,
                         backend=backend)
        return [ary_list[0]] + out_list
Пример #7
0
def arange(start, stop, step, dtype=np.int32, backend='cython'):
    if backend == 'opencl':
        import pyopencl.array as gpuarray
        from .opencl import get_queue
        out = gpuarray.arange(get_queue(), start, stop, step, dtype=dtype)
    elif backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        out = gpuarray.arange(start, stop, step, dtype=dtype)
    else:
        out = np.arange(start, stop, step, dtype=dtype)
    return wrap_array(out, backend)
Пример #8
0
def zeros(n, dtype, backend='cython'):
    if backend == 'opencl':
        import pyopencl.array as gpuarray
        from .opencl import get_queue
        out = gpuarray.zeros(get_queue(), n, dtype)
    elif backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        out = gpuarray.zeros(n, dtype)
    else:
        out = np.zeros(n, dtype=dtype)
    return wrap_array(out, backend)
Пример #9
0
def to_device(array, backend='cython'):
    if backend == 'cython':
        out = array
    elif backend == 'opencl':
        import pyopencl.array as gpuarray
        from .opencl import get_queue
        out = gpuarray.to_device(get_queue(), array)
    elif backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        out = gpuarray.to_gpu(array)
    return wrap_array(out, backend)
Пример #10
0
    def __init__(self, n, k=8, leaf_size=32):
        self.ctx = get_context()
        self.queue = get_queue()
        self.sorted = False
        self.main_helper = get_helper(os.path.join('tree', 'tree.mako'))

        self.initialized = False
        self.preamble = ""
        self.leaf_size = leaf_size
        self.k = k
        self.n = n
        self.sorted = False
        self.depth = 0

        self.index_function_args = []
        self.index_function_arg_ctypes = []
        self.index_function_arg_dtypes = []
        self.index_function_consts = []
        self.index_function_const_ctypes = []
        self.index_code = ""

        self.set_index_function_info()
Пример #11
0
def wrap_array(arr, backend):
    wrapped_array = Array(arr.dtype, allocate=False, backend=backend)
    if isinstance(arr, np.ndarray):
        wrapped_array.data = arr
        if backend == 'opencl' or backend == 'cuda':
            use_double = get_config().use_double
            _dtype = np.float64 if use_double else np.float32
            if np.issubdtype(arr.dtype, np.floating):
                wrapped_array.dtype = _dtype
                wrapped_array.data = arr.astype(_dtype)
            q = None
            if backend == 'opencl':
                from .opencl import get_queue
                from pyopencl.array import to_device
                q = get_queue()
                if arr is not None:
                    dev_ary = to_device(q, wrapped_array.data)
                    wrapped_array.set_data(dev_ary)
            elif backend == 'cuda':
                from .cuda import set_context
                set_context()
                from pycuda.gpuarray import to_gpu
                if arr is not None:
                    dev_ary = to_gpu(wrapped_array.data)
                    wrapped_array.set_data(dev_ary)
        else:
            wrapped_array.set_data(wrapped_array.data)
    elif backend == 'opencl':
        import pyopencl.array as gpuarray
        if isinstance(arr, gpuarray.Array):
            wrapped_array.set_data(arr)
    elif backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        if isinstance(arr, gpuarray.GPUArray):
            wrapped_array.set_data(arr)
    return wrapped_array