def set_node_bounds(self): vector_data_t = get_vector_dtype(self.c_type, self.dim) dtype = ctype_to_dtype(self.c_type) self.node_xmin = self.allocate_node_prop(vector_data_t) self.node_xmax = self.allocate_node_prop(vector_data_t) self.node_hmax = self.allocate_node_prop(dtype) params = _get_node_bound_kernel_parameters(self.dim, self.c_type, self.xvars) set_node_bounds = self.tree_bottom_up(params['args'], params['setup'], params['leaf_operation'], params['node_operation'], params['output_expr'], preamble=_get_macros_preamble( self.c_type, self.sorted, self.dim)) set_node_bounds = profile_kernel(set_node_bounds, 'set_node_bounds', backend='opencl') pa_gpu = self.pa.gpu dtype = ctype_to_dtype(self.c_type) args = [self, self.pids.dev] args += [getattr(pa_gpu, v).dev for v in self.xvars] args += [ pa_gpu.h.dev, dtype(self.radius_scale), self.node_xmin.dev, self.node_xmax.dev, self.node_hmax.dev ] set_node_bounds(*args)
def _setup_call_data(self): array_map = self.acceleration_eval_helper._array_map q = self.acceleration_eval_helper._queue calls = self.calls py_calls = self.py_calls steppers = self.object.steppers for method, info in self.py_data.items(): for dest_name in info: py_meth = getattr(steppers[dest_name], method) dest = array_map[dest_name] py_calls[method][dest] = (py_meth, dest) for method, info in self.data.items(): for dest_name, (kernel, args) in info.items(): dest = array_map[dest_name] # Note: This is done to do some late binding. Instead of # just directly storing the dest.gpu.x, we compute it on # the fly as the number of particles and the actual buffer # may change. def _getter(dest_gpu, x): return getattr(dest_gpu, x).dev.data _args = [ functools.partial(_getter, dest.gpu, x[2:]) for x in args ] all_args = [q, None, None] + _args call = getattr(self.program, kernel) call = profile_kernel(call, call.function_name) calls[method][dest] = (call, all_args, dest)
def profile_kernel(knl, backend): if backend == 'cuda': return knl elif backend == 'opencl': from compyle.opencl import profile_kernel return profile_kernel(knl, knl.function_name) else: raise RuntimeError('Unsupported GPU backend %s' % backend)
def find_neighbor_cids(self, tree_src): neighbor_cid_count = Array(np.uint32, n=self.unique_cid_count + 1, backend='opencl') find_neighbor_cid_counts = self._leaf_neighbor_operation( tree_src, args="uint2 *pbounds, int *cnt", setup="int count=0", operation=""" if (pbounds[cid_src].s0 < pbounds[cid_src].s1) count++; """, output_expr="cnt[i] = count;" ) find_neighbor_cid_counts = profile_kernel( find_neighbor_cid_counts, 'find_neighbor_cid_count', backend='opencl' ) find_neighbor_cid_counts(tree_src.pbounds.dev, neighbor_cid_count.dev) neighbor_psum = _get_neighbor_count_prefix_sum_kernel(self.ctx) neighbor_psum(neighbor_cid_count.dev) total_neighbors = int(neighbor_cid_count.dev[-1].get()) neighbor_cids = Array(np.uint32, n=total_neighbors, backend='opencl') find_neighbor_cids = self._leaf_neighbor_operation( tree_src, args="uint2 *pbounds, int *cnt, int *neighbor_cids", setup="int offset=cnt[i];", operation=""" if (pbounds[cid_src].s0 < pbounds[cid_src].s1) neighbor_cids[offset++] = cid_src; """, output_expr="" ) find_neighbor_cids = profile_kernel( find_neighbor_cids, 'find_neighbor_cids', backend='opencl') find_neighbor_cids(tree_src.pbounds.dev, neighbor_cid_count.dev, neighbor_cids.dev) return neighbor_cid_count, neighbor_cids
def _setup_calls(self): calls = [] prg = self.program array_index = self._array_index for item in self.data: type = item.get('type') if type == 'kernel': kernel = item.get('kernel') method = getattr(prg, kernel) method = profile_kernel(method, self.backend) dest = item['dest'] src = item.get('source', dest) args = [self._queue, None, None] for arg in item['args']: args.append(self._get_argument(arg, dest, src)) loop = item['loop'] args.append(self._get_argument('kern', dest, src)) info = dict(method=method, dest=self._array_map[dest], src=self._array_map[src], args=args, loop=loop, src_idx=array_index[src], dst_idx=array_index[dest], type='kernel') elif type == 'method': info = dict(item) if info.get('method') == 'do_reduce': args = info.get('args') grp = args[0] args[0] = [ x for x in grp.equations if hasattr(x, 'reduce') ] args[1] = self._array_map[args[1]] elif type == 'pre_post': info = dict(item) elif type == 'py_initialize': info = dict(item) info['dest'] = self._array_map[item.get('dest')] elif 'iteration' in type: group = item['group'] equations = get_equations_with_converged(group._orig_group) info = dict(type=type, equations=equations, group=group) else: raise RuntimeError('Unknown type %s' % type) calls.append(info) return calls
def get_simple_kernel(kernel_name, args, src, wgs, preamble=""): ctx = get_context() knl = SimpleKernel(ctx, args, src, wgs, kernel_name, preamble=preamble) return profile_kernel(knl, kernel_name, backend='opencl')
def get_elwise_kernel(kernel_name, args, src, preamble=""): ctx = get_context() from pyopencl.elementwise import ElementwiseKernel knl = ElementwiseKernel(ctx, args, src, kernel_name, preamble=preamble) return profile_kernel(knl, kernel_name, backend='opencl')
def _setup_calls(self): calls = [] prg = self.program array_index = self._array_index # Track the condition and end_group to facilitate group conditions. condition_stack = [] count = 0 for item in self.data: type = item.get('type') info = {} if type == 'kernel': kernel = item.get('kernel') method = getattr(prg, kernel) method = profile_kernel(method, self.backend) dest = item['dest'] src = item.get('source', dest) args = [self._queue, None, None] for arg in item['args']: args.append(self._get_argument(arg, dest, src)) loop = item['loop'] args.append(self._get_argument('kern', dest, src)) info = dict(method=method, dest=self._array_map[dest], src=self._array_map[src], args=args, loop=loop, src_idx=array_index[src], dst_idx=array_index[dest], start_idx=item.get('start_idx'), stop_idx=item.get('stop_idx'), type='kernel') elif type == 'method': info = dict(item) if info.get('method') == 'do_reduce': args = info.get('args') grp = args[0] args[0] = [ x for x in grp.equations if hasattr(x, 'reduce') ] args[1] = self._array_map[args[1]] elif type == 'pre_post': info = dict(item) elif type == 'py_initialize': info = dict(item) info['dest'] = self._array_map[item.get('dest')] elif 'iteration' in type: group = item['group'] equations = get_equations_with_converged(group._orig_group) info = dict(type=type, equations=equations, group=group) elif type == 'check_condition': info = dict(item) info['jump'] = None condition_stack.append(info) elif type == 'end_group': group = item['group'] if group.condition is not None: cond_info = condition_stack.pop() # count is the location of the next call. # We decrement it here as the counter is incremented # in the helper. cond_info['jump'] = count - 1 info = {} else: raise RuntimeError('Unknown type %s' % type) if info: calls.append(info) count += 1 return calls