def _get_remove_particles_bool_kernels(self): @annotate(i='int', if_remove='gintp', return_='int') def remove_input_expr(i, if_remove): return if_remove[i] @annotate(int='i, item, last_item, num_particles', gintp='if_remove, num_removed_particles', new_indices='guintp') def remove_output_expr(i, item, last_item, if_remove, new_indices, num_removed_particles, num_particles): if not if_remove[i]: new_indices[i - item] = i if i == num_particles - 1: num_removed_particles[0] = last_item remove_knl = Scan(remove_input_expr, remove_output_expr, 'a+b', dtype=np.int32, backend=self.backend) @annotate(int='i, size, stride', guintp='indices, new_indices') def stride_knl_elwise(i, indices, new_indices, size, stride): tmp_idx, j_s = declare('unsigned int', 2) for j_s in range(stride): tmp_idx = i * stride + j_s if tmp_idx < size: new_indices[tmp_idx] = indices[i] * stride + j_s stride_knl = Elementwise(stride_knl_elwise, backend=self.backend) return remove_knl, stride_knl
def setup_kernels(self): # neighbor kernels self.find_neighbor_lengths = Elementwise(find_neighbor_lengths_knl, backend=self.backend) self.find_neighbors = Elementwise(find_neighbors_knl, backend=self.backend) self.scan_start_indices = Scan(input=input_start_indices, output=output_start_indices, scan_expr="a+b", dtype=np.int32, backend=self.backend)
def __init__(self, x, y, h, xmax, ymax, backend=None): super().__init__(x, y, h, xmax, ymax, backend=backend) self.max_bits = np.ceil(np.log2(self.max_key)) # sort kernels self.fill_keys = Elementwise(fill_keys, backend=self.backend) self.fill_bin_counts = Elementwise(fill_bin_counts, backend=self.backend) self.scan_keys = Scan(input=input_scan_keys, output=output_scan_keys, scan_expr="a+b", dtype=np.int32, backend=self.backend)
def __init__(self, x, y, h, xmax, ymax, backend=None): self.backend = backend self.num_particles = x.length self.x, self.y = x, y self.h = h cmax = np.array([floor(xmax / h), floor(ymax / h)], dtype=np.int32) self.max_key = 1 + flatten(cmax[0], cmax[1], 1 + cmax[1]) self.qmax = 1 + cmax[1] # neighbor kernels self.find_neighbor_lengths = Elementwise(find_neighbor_lengths_knl, backend=self.backend) self.find_neighbors = Elementwise(find_neighbors_knl, backend=self.backend) self.scan_start_indices = Scan(input=input_start_indices, output=output_start_indices, scan_expr="a+b", dtype=np.int32, backend=self.backend) self.init_arrays()
def _get_align_kernel_with_strides(self): @annotate(i='int', tag_arr='gintp', return_='int') def align_input_expr(i, tag_arr): return tag_arr[i] == 0 @annotate(int='i, item, prev_item, last_item, stride, num_particles', gintp='tag_arr, new_indices', return_='int') def align_output_expr(i, item, prev_item, last_item, tag_arr, new_indices, num_particles, stride): t, idx, j_s = declare('int', 3) t = last_item + i - prev_item idx = t if tag_arr[i] else prev_item for j_s in range(stride): new_indices[stride * idx + j_s] = stride * i + j_s align_particles_knl = Scan(align_input_expr, align_output_expr, 'a+b', dtype=np.int32, backend=self.backend) return align_particles_knl
def _get_align_kernel_without_strides(self): @annotate(i='int', tag_arr='gintp', return_='int') def align_input_expr(i, tag_arr): return tag_arr[i] == 0 @annotate(int='i, item, prev_item, last_item, num_particles', gintp='tag_arr, new_indices, num_real_particles') def align_output_expr(i, item, prev_item, last_item, tag_arr, new_indices, num_particles, num_real_particles): t, idx = declare('int', 2) t = last_item + i - prev_item idx = t if tag_arr[i] else prev_item new_indices[idx] = i if i == num_particles - 1: num_real_particles[0] = last_item align_particles_knl = Scan(align_input_expr, align_output_expr, 'a+b', dtype=np.int32, backend=self.backend) return align_particles_knl
def _get_ghosts_scan_kernel(self): @annotate def inp_fill_ghosts(i, periodic_in_x, periodic_in_y, periodic_in_z, x, y, z, xmin, ymin, zmin, xmax, ymax, zmax, cell_size): x_copies, y_copies, z_copies = declare('int', 3) x_copies = 1 y_copies = 1 z_copies = 1 if periodic_in_x: if (x[i] - xmin) <= cell_size: x_copies += 1 if (xmax - x[i]) <= cell_size: x_copies += 1 if periodic_in_y: if (y[i] - ymin) <= cell_size: y_copies += 1 if (ymax - y[i]) <= cell_size: y_copies += 1 if periodic_in_z: if (z[i] - zmin) <= cell_size: z_copies += 1 if (zmax - z[i]) <= cell_size: z_copies += 1 return x_copies * y_copies * z_copies - 1 @annotate def out_fill_ghosts(i, item, prev_item, periodic_in_x, periodic_in_y, periodic_in_z, x, y, z, xmin, ymin, zmin, xmax, ymax, zmax, cell_size, masks, indices): xleft, yleft, zleft = declare('int', 3) xright, yright, zright = declare('int', 3) xleft = 0 yleft = 0 zleft = 0 xright = 0 yright = 0 zright = 0 if periodic_in_x: if (x[i] - xmin) <= cell_size: xright = 1 if (xmax - x[i]) <= cell_size: xleft = -1 if periodic_in_y: if (y[i] - ymin) <= cell_size: yright = 1 if (ymax - y[i]) <= cell_size: yleft = -1 if periodic_in_z: if (z[i] - zmin) <= cell_size: zright = 1 if (zmax - z[i]) <= cell_size: zleft = -1 xp, yp, zp = declare('int', 3) idx, mask = declare('int', 2) idx = prev_item for xp in range(-1, 2): if xp != 0 and ((xleft == 0 and xright == 0) or (xp != xleft and xp != xright)): continue for yp in range(-1, 2): if yp != 0 and ((yleft == 0 and yright == 0) or (yp != yleft and yp != yright)): continue for zp in range(-1, 2): if zp != 0 and ((zleft == 0 and zright == 0) or (zp != zleft and zp != zright)): continue if xp == 0 and yp == 0 and zp == 0: continue mask = (xp + 1) * 9 + (yp + 1) * 3 + (zp + 1) masks[idx] = mask indices[idx] = i idx += 1 return Scan(inp_fill_ghosts, out_fill_ghosts, 'a+b', dtype=np.int32, backend=self.backend)
def get_scan(inp_f, out_f, dtype, backend): return Scan(input=inp_f, output=out_f, dtype=dtype, backend=backend)