def setup_sort(self, index): """Setup the sorting algorithm. Index must be a CLVar of CLUInt type with dim=1, e.g. shape=(length,). The whole Group will be sorted according to the index values. Input: index -- the index to sort the group.""" if not index in self._vars: self.add(index) #setup vars self._sortindex = index self._grid = CLUInt(int(max_reduce(self._sortindex)) + 1) self._celloffset = CLUInt() self.add(self._celloffset) #fillgrid kernel src = Syncgroup._fgrid_kern_src src = src.replace('$index', self._sortindex.name) src = src.replace('$celloffset', self._celloffset.name) src = src.replace('$grid', self._grid.name) varlist = [self._sortindex, self._grid, self._celloffset] self._fillgridkern = CLKernel(src=src, varlist=varlist, name='fgridkern') self._setup_sortcopy() self._reset_grid_kern = CLTemplateKernel(src=Syncgroup._reset_grid_str, name='reset_grid') self._reset_grid_kern.grid = self._grid self._reset_grid_kern.compile() self._reset_co_kern = CLTemplateKernel(src=Syncgroup._reset_co_str, name='reset_co') self._reset_co_kern.celloffset = self._celloffset self._reset_co_kern.compile() self._sort_enabeld = True
def sort(self): """Sort the group. Sorting must be enabled before. See setup_sort(). Error messages may be unhelpful otherwise, as correct sorting setup is not checked, when sort() is called (performance reasons).""" #reset grid (size may change depending on index), reset grid and celloffset to 0 self._grid.set_shape_wo_read( (int(max_reduce(self._sortindex)) + 1,) ) self._reset_grid_kern() self._reset_co_kern() #fill grid self._fillgridkern() #scan grid scan_uint(self._grid) # scan on GPU #sortcopy self._sortcopykern() #copy back self._backcopykern()