示例#1
0
文件: group.py 项目: hagisgit/numcl
 def setup_sort(self, index):
     """Setup the sorting algorithm. Index must be a CLVar of CLUInt type with dim=1, e.g. shape=(length,).
     The whole Group will be sorted according to the index values.
     
     Input:
     index -- the index to sort the group."""
     if not index in self._vars:
         self.add(index)
     #setup vars
     self._sortindex = index
     self._grid = CLUInt(int(max_reduce(self._sortindex)) + 1)
     self._celloffset = CLUInt()
     self.add(self._celloffset)
     #fillgrid kernel
     src = Syncgroup._fgrid_kern_src
     src = src.replace('$index', self._sortindex.name)
     src = src.replace('$celloffset', self._celloffset.name)
     src = src.replace('$grid', self._grid.name)
     varlist = [self._sortindex, self._grid, self._celloffset]
     self._fillgridkern = CLKernel(src=src, varlist=varlist, name='fgridkern')
     self._setup_sortcopy()
     self._reset_grid_kern = CLTemplateKernel(src=Syncgroup._reset_grid_str, name='reset_grid')
     self._reset_grid_kern.grid = self._grid
     self._reset_grid_kern.compile()
     self._reset_co_kern = CLTemplateKernel(src=Syncgroup._reset_co_str, name='reset_co')
     self._reset_co_kern.celloffset = self._celloffset
     self._reset_co_kern.compile()
     self._sort_enabeld = True
示例#2
0
文件: tools.py 项目: hagisgit/SLIC
def gradient_field(data):
    dat = np.array(data)
    dat.shape = (dat.shape[0], dat.shape[1], dat.shape[2], 1)
    volume = CLReal()
    volume.value = dat
    gradient = CLReal4(*volume.shape)
    kern = CLTemplateKernel(src=_GRADIENT_SRC)
    kern.volume = volume
    kern.gradient = gradient
    kern.compile()
    kern()
    return gradient.value
示例#3
0
文件: alg.py 项目: hagisgit/numcl
 def __init__(self, posarg, group=None, h=1.0, maxneighs=100, index=None, indvec=None, neigharray=None):
     """Initialize spatial fixed radius neighbor search. Neighbors are searched inside a fixed radius 
     specified by h, which defines the cell size in which the spatial domain is divided during searching.
     The Algorithm uses Z-order indexing and parallel counting sort (see Syncgroup.sort() and ZIndex for 
     details).
     
     Input:
     posarg -- the positional argument, must be a 3d vector, e.g. CLFloat4, CLDouble4 or CLReal4.
     group -- the Syncgroup posarg belongs to, if not provided an internal group is created 
     (self._group).
     h -- cell size for the neighbor search. radius = h/2.
     maxneighs -- maximum neighbor count per item.
     index -- Z-order index. --> see ZIndex
     indvec -- spatial index --> see ZIndex
     neigharray -- array containing neigbors indices of shape( len(posarg), maxneighs). 
                  If not specified available as self.neighbors.
     """
     if not group:
         group = Syncgroup(posarg)
     if not (posarg in group._vars):
         raise ValueError("posarg must be member of group.")
     self._posarg = posarg
     self._group = group
     self._maxneighs = CLScalar(maxneighs)
     self._indexkernel = ZIndex(posarg=posarg, h=h, index=index, indvec=indvec)
     self._group.add(self._indexkernel._indvec)
     self._group.setup_sort(index=self._indexkernel._index)
     self._neighbors = None
     if neigharray:
         neigharray.shape = (self._indexkernel._index._value.size, maxneighs)
         self._neighbors = neigharray
     else:
         self._neighbors = CLInt(self._indexkernel._index._value.size, maxneighs)
     self._grid_length = CLScalar(len(self._group._grid))  # , name='__grid_length')
     self._posarg_length = CLScalar(len(self._posarg))  # , name='__posarg_length')
     src = Neighsearch._find_kern_str
     src = src.replace("$posarg_length", self._posarg_length.name)
     src = src.replace("$grid_length", self._grid_length.name)
     src = src.replace("$posarg", self._posarg.name)
     src = src.replace("$neighbors", self._neighbors.name)
     src = src.replace("$ind4", self._indexkernel._indvec.name)
     src = src.replace("$grid", self._group._grid.name)
     src = src.replace("$maxneighs", self._maxneighs.name)
     src = src.replace("$h", self._indexkernel._h.name)
     varlist = [
         self._posarg_length,
         self._grid_length,
         self._posarg,
         self._neighbors,
         self._indexkernel._indvec,
         self._group._grid,
         self._maxneighs,
         self._indexkernel._h,
     ]
     self._findkern = CLKernel(pre=ZIndex._il3_16_src, varlist=varlist, src=src, name="find_neighbors")
     self._reset_kern = CLTemplateKernel(src=Neighsearch._reset_kern_str, name="reset_neighbors")
     self._reset_kern.neighbors = self._neighbors
     self._reset_kern.maxneighs = self._maxneighs
     self._reset_kern.compile()
示例#4
0
 def __init__(self, pts, max_depth=None):
     if not len(pts.shape) == 2 and not pts.shape[1] == 4:
         raise ValueError('pts must be a numpy ndarray with shape (length,4).')
     if max_depth == None:
         max_depth = math.log(len(pts), 2) + 2
     self._points = CLReal()
     parr = np.array(pts).flatten()
     parr.shape = (parr.shape[0], 1)
     self._points.value = parr
     data = list(kd_c.get_tree(pts))
     for i in range(len(data)):
         data[i] = data[i].flatten()
         data[i].shape = (data[i].shape[0], 1)
     self._id = CLInt()
     self._parent = CLInt()
     self._cdim = CLInt()
     self._pt = CLInt()
     self._lo = CLInt()
     self._hi = CLInt()
     self._vmax = CLReal()
     self._vmin = CLReal()
     self._cval = CLReal()
     self._offset = CLScalar(0)
     self._id.value = data[0]
     self._parent.value = data[1]
     self._cdim.value = data[2]
     self._pt.value = data[3]
     self._lo.value = data[4]
     self._hi.value = data[5]
     self._vmin.value = data[6]
     self._vmax.value = data[7]
     self._cval.value = data[8]
     #prepare for nns
     self._query_points = CLReal4()
     self._neigh_i = CLInt()
     self._neigh_d = CLReal()
     #prepare the kernel
     pre = "#define MAX_DEPTH " + str(int(max_depth))
     self._get_nn_krn = CLTemplateKernel(pre=pre, src=_GET_NN_SRC)
     self._get_nn_krn.query_points = self._query_points
     self._get_nn_krn.points = self._points
     self._get_nn_krn.id = self._id
     self._get_nn_krn.parent = self._parent
     self._get_nn_krn.cdim = self._cdim
     self._get_nn_krn.pt = self._pt
     self._get_nn_krn.lo = self._lo
     self._get_nn_krn.hi = self._hi
     self._get_nn_krn.vmin = self._vmin
     self._get_nn_krn.vmax = self._vmax
     self._get_nn_krn.cval = self._cval
     self._get_nn_krn.neigh_i = self._neigh_i
     self._get_nn_krn.neigh_d = self._neigh_d
     self._get_nn_krn.offset = self._offset
     self._get_nn_krn.compile()
示例#5
0
class KD_tree(object):
    def __init__(self, pts, max_depth=None):
        if not len(pts.shape) == 2 and not pts.shape[1] == 4:
            raise ValueError('pts must be a numpy ndarray with shape (length,4).')
        if max_depth == None:
            max_depth = math.log(len(pts), 2) + 2
        self._points = CLReal()
        parr = np.array(pts).flatten()
        parr.shape = (parr.shape[0], 1)
        self._points.value = parr
        data = list(kd_c.get_tree(pts))
        for i in range(len(data)):
            data[i] = data[i].flatten()
            data[i].shape = (data[i].shape[0], 1)
        self._id = CLInt()
        self._parent = CLInt()
        self._cdim = CLInt()
        self._pt = CLInt()
        self._lo = CLInt()
        self._hi = CLInt()
        self._vmax = CLReal()
        self._vmin = CLReal()
        self._cval = CLReal()
        self._offset = CLScalar(0)
        self._id.value = data[0]
        self._parent.value = data[1]
        self._cdim.value = data[2]
        self._pt.value = data[3]
        self._lo.value = data[4]
        self._hi.value = data[5]
        self._vmin.value = data[6]
        self._vmax.value = data[7]
        self._cval.value = data[8]
        #prepare for nns
        self._query_points = CLReal4()
        self._neigh_i = CLInt()
        self._neigh_d = CLReal()
        #prepare the kernel
        pre = "#define MAX_DEPTH " + str(int(max_depth))
        self._get_nn_krn = CLTemplateKernel(pre=pre, src=_GET_NN_SRC)
        self._get_nn_krn.query_points = self._query_points
        self._get_nn_krn.points = self._points
        self._get_nn_krn.id = self._id
        self._get_nn_krn.parent = self._parent
        self._get_nn_krn.cdim = self._cdim
        self._get_nn_krn.pt = self._pt
        self._get_nn_krn.lo = self._lo
        self._get_nn_krn.hi = self._hi
        self._get_nn_krn.vmin = self._vmin
        self._get_nn_krn.vmax = self._vmax
        self._get_nn_krn.cval = self._cval
        self._get_nn_krn.neigh_i = self._neigh_i
        self._get_nn_krn.neigh_d = self._neigh_d
        self._get_nn_krn.offset = self._offset
        self._get_nn_krn.compile()
        
    def get_nn(self, pts, cl_ref=False, chunksize=100000):
        len_pts = len(pts)
        chunks = int(math.ceil(float(len_pts)/float(chunksize)))
        neighs = np.zeros((len_pts,1), np.int32)
        dists = np.zeros((len_pts,1), np.float64)
        self._neigh_i.value = neighs
        self._neigh_d.value = dists
        self._query_points.value = pts
        for i in range(chunks):
            print 'computing chunk', i
            self._offset.value = i*chunksize
            execsize = chunksize
            if i == chunks - 1:
                execsize = len_pts - i*chunksize
            #print 'execsize is', execsize
            self._get_nn_krn.exec_with_size((execsize,))
            self._get_nn_krn.finish()
        if cl_ref:
            return self._neigh_i, self._neigh_d
        else:
            return self._neigh_i.value, self._neigh_d.value
            
    def get_data(self):
        data = []
        data.append(self._id.value)
        data.append(self._parent.value)
        data.append(self._cdim.value)
        data.append(self._pt.value)
        data.append(self._lo.value)
        data.append(self._hi.value)
        data.append(self._vmin.value)
        data.append(self._vmax.value)
        data.append(self._cval.value)
        return data
示例#6
0
文件: group.py 项目: hagisgit/numcl
class Syncgroup(object):
    _fgrid_kern_src ="""
int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1);

uint z_index = $index[gid];

$celloffset[gid] = atomic_inc(&$grid[z_index]);
"""

    _csort_str = """
int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1);
int gsz = get_global_size(0)*get_global_size(1)*get_global_size(2);

uint theindex = $index[gid];

int dest = $grid[theindex] - $celloffset[gid] - 1; 

if ((dest >= 0) && (dest < gsz)){
    
$copyblock
}
"""

    _reset_grid_str = """
int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1);

$grid$[gid] = 0;
"""
    _reset_co_str = """
int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1);

$celloffset$[gid] = 0;
"""
   
    def __init__(self, *args, **kwargs):
        """Create a new Syncgroup. All Members of the group will automatically synced in length.
        If 'axis' kwarg is provided, variables will be synced along the specified axis.
        
        Input:
        args -- an arbitrary number of CLVar objects
        kwargs --> provide 'axis' keyword argument to specify the axis to keep in sync. Must be 0, 1 or 2."""
        self._vars = list(args)
        self._vars_copy = [] #needed for sorting
        self._sort_enabeld = False #needed for sorting
        for var in self._vars:
            if not var._addspc == 'global':
                raise ValueError('Only global variables can be synced.')
        self._axis = 0
        ax = kwargs.pop('axis', 0)
        if ax < 3 and ax >= 0:
            self._axis = 0
        else: 
            raise ValueError('Axis can only be 0, 1 or 2.')
        maxlength = 0
        mlvar = None
        for var in self._vars:
            var._register_grp(self)
            if var._value.shape[self._axis] > maxlength:
                maxlength = var._value.shape[self._axis]
                mlvar = var
        self.sync(mlvar)            
    
    def sync(self, caller):
        """Sync group to length of caller (along the specified axis)."""
        for var in self._vars:
            if not(var == caller):
                shp = list(var._value.shape)
                shp[self._axis] = caller._value.shape[self._axis]
                var._read_buffer()
                var._value.resize(shp)
                var.set_value(var._value, dontsync=True)
        for var in self._vars_copy:
            if not(var == caller):
                shp = list(var._value.shape)
                shp[self._axis] = caller._value.shape[self._axis]
                var._read_buffer()
                var._value.resize(shp)
                var.set_value(var._value, dontsync=True)
                
    def add(self, *args, **kwargs):
        """Add variable to the group."""
        for var in args:
            if not(var in self._vars):
                var._register_grp(self)
                self._vars.append(var)
        maxlength = 0
        for var in self._vars:
            if var._value.shape[self._axis] > maxlength:
                maxlength = var._value.shape[self._axis]
                mlvar = var
        self.sync(mlvar)
        if self._sort_enabeld:
            self._setup_sortcopy()
                
    def setup_sort(self, index):
        """Setup the sorting algorithm. Index must be a CLVar of CLUInt type with dim=1, e.g. shape=(length,).
        The whole Group will be sorted according to the index values.
        
        Input:
        index -- the index to sort the group."""
        if not index in self._vars:
            self.add(index)
        #setup vars
        self._sortindex = index
        self._grid = CLUInt(int(max_reduce(self._sortindex)) + 1)
        self._celloffset = CLUInt()
        self.add(self._celloffset)
        #fillgrid kernel
        src = Syncgroup._fgrid_kern_src
        src = src.replace('$index', self._sortindex.name)
        src = src.replace('$celloffset', self._celloffset.name)
        src = src.replace('$grid', self._grid.name)
        varlist = [self._sortindex, self._grid, self._celloffset]
        self._fillgridkern = CLKernel(src=src, varlist=varlist, name='fgridkern')
        self._setup_sortcopy()
        self._reset_grid_kern = CLTemplateKernel(src=Syncgroup._reset_grid_str, name='reset_grid')
        self._reset_grid_kern.grid = self._grid
        self._reset_grid_kern.compile()
        self._reset_co_kern = CLTemplateKernel(src=Syncgroup._reset_co_str, name='reset_co')
        self._reset_co_kern.celloffset = self._celloffset
        self._reset_co_kern.compile()
        self._sort_enabeld = True
        
    def _setup_sortcopy(self):
        self._vars_copy = []
        for var in self._vars:
            copyvar = CLVar.var_like(var)
            self._vars_copy.append(copyvar)
        varlist = self._vars + self._vars_copy + [self._grid]
        #csort kenrel
        copyblock = ''
        backcopyblock = ''
        i = 0
        for var in self._vars:
            copyblock += '    ' + self._vars_copy[i].name + '[dest] = ' + var.name + '[gid];\n'
            backcopyblock += '    ' + var.name + '[gid] = ' + self._vars_copy[i].name + '[gid];\n'
            i += 1
        src = Syncgroup._csort_str
        src = src.replace('$index', self._sortindex.name)
        src = src.replace('$celloffset', self._celloffset.name)
        src = src.replace('$grid', self._grid.name)
        src = src.replace('$copyblock', copyblock)
        self._sortcopykern = CLKernel(src=src, varlist=varlist, name='sortcopykern')
        backcopystr = """int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1);\n\n"""
        backcopystr += backcopyblock
        self._backcopykern = CLKernel(src=backcopystr, varlist=varlist, name='backcopykern')
    
    #@timing    
    def sort(self):
        """Sort the group. Sorting must be enabled before. See setup_sort(). 
        Error messages may be unhelpful otherwise, as correct sorting setup is not checked,
        when sort() is called (performance reasons)."""
        #reset grid (size may change depending on index), reset grid and celloffset to 0
        self._grid.set_shape_wo_read( (int(max_reduce(self._sortindex)) + 1,) )
        self._reset_grid_kern()
        self._reset_co_kern()
        #fill grid
        self._fillgridkern()
        #scan grid
        scan_uint(self._grid) # scan on GPU
        #sortcopy
        self._sortcopykern()
        #copy back
        self._backcopykern()
示例#7
0
文件: alg.py 项目: hagisgit/numcl
class Neighsearch(object):
    _find_kern_pre = """
int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1);
int gsz = get_global_size(0)*get_global_size(1)*get_global_size(2);

int xind = $ind4[gid].s0;
int yind = $ind4[gid].s1;
int zind = $ind4[gid].s2;
real4 this_pos = $posarg[gid];
int id, xc, yc, zc, start;
start = 0;
int counter = 0;

"""
    _find_kern_body = """
id = interleave3_16(xind + xc, yind + yc, zind + zc);

if ((id < $grid_length) && (id >= 0)){
    start = $grid[id] - 1;
    int len_cell = 0;
    if (id == 0){
        len_cell = start + 1;
    }else{
        len_cell = (start + 1) - $grid[id - 1];
    }
    if ((start >= 0) && (start < $posarg_length)){
        for (int i = 0; i < len_cell; i++){
            if (counter < $maxneighs){
                int other_id = start - i;
                if (other_id != gid){
                    real dist = length(this_pos - $posarg[other_id]);
                    if (dist < $h){
                         $neighbors[gid*$maxneighs + counter] = other_id;
                         counter++;
                    }
                }
            }
        }
    }
}


"""
    _find_kern_str = ""
    _find_kern_str += _find_kern_pre
    for i in range(3):
        for j in range(3):
            for k in range(3):
                _find_kern_str += "xc = " + str(i - 1) + ";\n"
                _find_kern_str += "yc = " + str(j - 1) + ";\n"
                _find_kern_str += "zc = " + str(k - 1) + ";\n\n"
                _find_kern_str += _find_kern_body

    _reset_kern_str = """
int gid = get_global_id(0) + get_global_id(1)*get_global_size(0) + get_global_id(2)*get_global_size(0)*get_global_size(1);

$neighbors$[gid] = -1;

"""

    def __init__(self, posarg, group=None, h=1.0, maxneighs=100, index=None, indvec=None, neigharray=None):
        """Initialize spatial fixed radius neighbor search. Neighbors are searched inside a fixed radius 
        specified by h, which defines the cell size in which the spatial domain is divided during searching.
        The Algorithm uses Z-order indexing and parallel counting sort (see Syncgroup.sort() and ZIndex for 
        details).
        
        Input:
        posarg -- the positional argument, must be a 3d vector, e.g. CLFloat4, CLDouble4 or CLReal4.
        group -- the Syncgroup posarg belongs to, if not provided an internal group is created 
        (self._group).
        h -- cell size for the neighbor search. radius = h/2.
        maxneighs -- maximum neighbor count per item.
        index -- Z-order index. --> see ZIndex
        indvec -- spatial index --> see ZIndex
        neigharray -- array containing neigbors indices of shape( len(posarg), maxneighs). 
                     If not specified available as self.neighbors.
        """
        if not group:
            group = Syncgroup(posarg)
        if not (posarg in group._vars):
            raise ValueError("posarg must be member of group.")
        self._posarg = posarg
        self._group = group
        self._maxneighs = CLScalar(maxneighs)
        self._indexkernel = ZIndex(posarg=posarg, h=h, index=index, indvec=indvec)
        self._group.add(self._indexkernel._indvec)
        self._group.setup_sort(index=self._indexkernel._index)
        self._neighbors = None
        if neigharray:
            neigharray.shape = (self._indexkernel._index._value.size, maxneighs)
            self._neighbors = neigharray
        else:
            self._neighbors = CLInt(self._indexkernel._index._value.size, maxneighs)
        self._grid_length = CLScalar(len(self._group._grid))  # , name='__grid_length')
        self._posarg_length = CLScalar(len(self._posarg))  # , name='__posarg_length')
        src = Neighsearch._find_kern_str
        src = src.replace("$posarg_length", self._posarg_length.name)
        src = src.replace("$grid_length", self._grid_length.name)
        src = src.replace("$posarg", self._posarg.name)
        src = src.replace("$neighbors", self._neighbors.name)
        src = src.replace("$ind4", self._indexkernel._indvec.name)
        src = src.replace("$grid", self._group._grid.name)
        src = src.replace("$maxneighs", self._maxneighs.name)
        src = src.replace("$h", self._indexkernel._h.name)
        varlist = [
            self._posarg_length,
            self._grid_length,
            self._posarg,
            self._neighbors,
            self._indexkernel._indvec,
            self._group._grid,
            self._maxneighs,
            self._indexkernel._h,
        ]
        self._findkern = CLKernel(pre=ZIndex._il3_16_src, varlist=varlist, src=src, name="find_neighbors")
        self._reset_kern = CLTemplateKernel(src=Neighsearch._reset_kern_str, name="reset_neighbors")
        self._reset_kern.neighbors = self._neighbors
        self._reset_kern.maxneighs = self._maxneighs
        self._reset_kern.compile()

    @property
    def neighbors(self):
        return self._neighbors

    def search(self, h=None, maxneighs=None):
        self._indexkernel(h=h)
        if maxneighs:
            self._maxneighs.value = maxneighs
            self._neighbors.set_shape_wo_read((self._indexkernel._index._value.size, maxneighs))
        elif not (self._indexkernel._index._value.size == self._neighbors.shape[0]):
            self._neighbors.set_shape_wo_read((self._indexkernel._index._value.size, self.maxneighs.value))
        self._reset_kern()
        self._group.sort()
        self._grid_length.value = len(self._group._grid)
        self._posarg_length.value = len(self._posarg)
        self._findkern()

    def info(self):
        return self._indexkernel.info()