Python OCLArray.from_array示例，gputools.OCLArray.from_array Python示例

示例#1

0

显示文件

def _convolve_sep2_numpy(data, hx, hy):
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))

    data_g = OCLArray.from_array(data.astype(np.float32))

    return _convolve_sep2_gpu(data_g, hx_g, hy_g).get()

示例#2

0

显示文件

    def run(self, data: np.ndarray):
        if data.shape != self.shape:
            raise ValueError("data and h have to be same shape")

        # set up some gpu buffers
        data64 = data.astype(np.complex64)
        y_g = OCLArray.from_array(data64)
        u_g = OCLArray.from_array(data64)

        # hflipped_g = OCLArray.from_array(h.astype(np.complex64))

        for i in range(self.n_iter):
            # logger.info("Iteration: {}".format(i))
            fft_convolve(u_g,
                         self.psf_g,
                         plan=self.plan,
                         res_g=self.tmp_g,
                         kernel_is_fft=True)

        _complex_divide_inplace(y_g, self.tmp_g)

        fft_convolve(self.tmp_g,
                     self.psfflip_f_g,
                     plan=self.plan,
                     inplace=True,
                     kernel_is_fft=True)

        _complex_multiply_inplace(u_g, self.tmp_g)

        # can abs be calculated on the gpu ?
        return np.abs(u_g.get())

示例#3

0

显示文件

文件： deconv_rl.py 项目： DerThorsten/gputools

def _deconv_rl_np(data, h, Niter = 10, ):
    """
    """
    d_g = OCLArray.from_array(data.astype(np.float32, copy = False))
    h_g = OCLArray.from_array(h.astype(np.float32, copy = False))
    res_g = _deconv_rl_gpu_conv(d_g,h_g,Niter)
    return res_g.get()

示例#4

0

显示文件

文件： bpm_class.py 项目： maweigert/bpm

    def setup(self, size, units, lam=0.5, n0=1.0, use_fresnel_approx=False):
        """
            sets up the internal variables e.g. propagators etc...

            :param size:  the size of the geometry in pixels (Nx,Ny,Nz)
            :param units: the phyiscal units of each voxel in microns (dx,dy,dz)
            :param lam: the wavelength of light in microns
            :param n0:  the refractive index of the surrounding media
            :param use_fresnel_approx:  if True, uses fresnel approximation for propagator


        """
        Bpm3d_Base.setup(self, size, units, lam=lam, n0=n0, use_fresnel_approx=use_fresnel_approx)

        # setting up the gpu buffers and kernels
        self.program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

        Nx, Ny = self.size[:2]
        plan = fft_plan(())
        self._H_g = OCLArray.from_array(self._H.astype(np.complex64))

        self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32))
        self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32))

        self.scatter_cross_sec_g = OCLArray.zeros(Nz, "float32")
        self.gfactor_g = OCLArray.zeros(Nz, "float32")

        self.reduce_kernel = OCLReductionKernel(
            np.float32,
            neutral="0",
            reduce_expr="a+b",
            map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)",
            arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain",
        )

示例#5

0

显示文件

def _fft_convolve_numpy(data, h, plan = None,
                        kernel_is_fft = False,
                        kernel_is_fftshifted = False):
    """ convolving via opencl fft for numpy arrays

    data and h must have the same size
    """

    dev = get_device()

    if data.shape != h.shape:
        raise ValueError("data and kernel must have same size! %s vs %s "%(str(data.shape),str(h.shape)))

    
    data_g = OCLArray.from_array(data.astype(np.complex64))

    if not kernel_is_fftshifted:
        h = np.fft.fftshift(h)

    
    h_g = OCLArray.from_array(h.astype(np.complex64))
    res_g = OCLArray.empty_like(data_g)
    
    _fft_convolve_gpu(data_g,h_g,res_g = res_g,
                      plan = plan,
                      kernel_is_fft = kernel_is_fft)

    res =  abs(res_g.get())

    del data_g
    del h_g
    del res_g
    
    return res

示例#6

0

显示文件

文件： convolve_sep.py 项目： maweigert/gputools

def test_3d():
    from time import time
    Niter = 10
    
    data = np.zeros((128,)*3,np.float32)

    data[30,30,30] = 1.
    hx = 1./5*np.ones(5)
    hy = 1./13*np.ones(13)
    hz = 1./13*np.ones(11)

    t = time()
    for _ in range(Niter):
        out = convolve_sep3(data,hx,hy, hz)
    print "time: %.3f ms"%(1000.*(time()-t)/Niter)

    data_g = OCLArray.from_array(data.astype(np.float32))
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))
    hz_g = OCLArray.from_array(hz.astype(np.float32))

    t = time()
    for _ in range(Niter):
        out_g = convolve_sep3(data_g,hx_g,hy_g, hz_g)

    out_g.get();
    print "time: %.3f ms"%(1000.*(time()-t)/Niter)

        
    return  out, out_g.get()

示例#7

0

显示文件

文件： convolve_sep.py 项目： maweigert/gputools

def _convolve_sep2_numpy(data,hx,hy):
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))

    data_g = OCLArray.from_array(data.astype(np.float32))

    return _convolve_sep2_gpu(data_g,hx_g,hy_g).get()

示例#8

0

显示文件

def test_3d():
    from time import time
    Niter = 10

    data = np.zeros((128, ) * 3, np.float32)

    data[30, 30, 30] = 1.
    hx = 1. / 5 * np.ones(5)
    hy = 1. / 13 * np.ones(13)
    hz = 1. / 13 * np.ones(11)

    t = time()
    for _ in range(Niter):
        out = convolve_sep3(data, hx, hy, hz)
    print("time: %.3f ms" % (1000. * (time() - t) / Niter))

    data_g = OCLArray.from_array(data.astype(np.float32))
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))
    hz_g = OCLArray.from_array(hz.astype(np.float32))

    t = time()
    for _ in range(Niter):
        out_g = convolve_sep3(data_g, hx_g, hy_g, hz_g)

    out_g.get()
    print("time: %.3f ms" % (1000. * (time() - t) / Niter))

    return out, out_g.get()

示例#9

0

显示文件

def _convolve_np(data, h):
    """
    numpy variant
    """

    data_g = OCLArray.from_array(data.astype(np.float32, copy=False))
    h_g = OCLArray.from_array(h.astype(np.float32, copy=False))

    return _convolve_buf(data_g, h_g).get()

示例#10

0

显示文件

文件： convolve.py 项目： shishu94/gputools

def _convolve_np(data, h):
    """
    numpy variant
    """

    data_g = OCLArray.from_array(np.require(data,np.float32,"C"))
    h_g = OCLArray.from_array(np.require(h,np.float32,"C"))

    return _convolve_buf(data_g, h_g).get()

示例#11

0

显示文件

文件： convolve.py 项目： DerThorsten/gputools

def _convolve_np(data, h):
    """
    numpy variant
    """

    
    data_g = OCLArray.from_array(data.astype(np.float32, copy = False))
    h_g = OCLArray.from_array(h.astype(np.float32, copy = False))
    
    return _convolve_buf(data_g, h_g).get()

示例#12

0

显示文件

def _deconv_rl_np(
    data,
    h,
    Niter=10,
):
    """
    """
    d_g = OCLArray.from_array(data.astype(np.float32, copy=False))
    h_g = OCLArray.from_array(h.astype(np.float32, copy=False))
    res_g = _deconv_rl_gpu_conv(d_g, h_g, Niter)
    return res_g.get()

示例#13

0

显示文件

文件： fftshift.py 项目： maweigert/gputools

def fftshift(arr_obj, axes = None, res_g = None, return_buffer = False):
    """
    gpu version of fftshift for numpy arrays or OCLArrays

    Parameters
    ----------
    arr_obj: numpy array or OCLArray (float32/complex64)
        the array to be fftshifted
    axes: list or None
        the axes over which to shift (like np.fft.fftshift)
        if None, all axes are taken
    res_g:
        if given, fills it with the result (has to be same shape and dtype as arr_obj)
        else internally creates a new one
    Returns
    -------
        if return_buffer, returns the result as (well :) OCLArray
        else returns the result as numpy array

    """

    if axes is None:
        axes = range(arr_obj.ndim)


    if isinstance(arr_obj, OCLArray):
        if not arr_obj.dtype.type in DTYPE_KERNEL_NAMES.keys():
            raise NotImplementedError("only works for float32 or complex64")
    elif isinstance(arr_obj, np.ndarray):
        if np.iscomplexobj(arr_obj):
            arr_obj = OCLArray.from_array(arr_obj.astype(np.complex64,copy = False))
        else:
            arr_obj = OCLArray.from_array(arr_obj.astype(np.float32,copy = False))
    else:
        raise ValueError("unknown type (%s)"%(type(arr_obj)))

    if not np.all([arr_obj.shape[a]%2==0 for a in axes]):
        raise NotImplementedError("only works on axes of even dimensions")

    if res_g is None:
        res_g = OCLArray.empty_like(arr_obj)


    # iterate over all axes
    # FIXME: this is still rather inefficient
    in_g = arr_obj
    for ax in axes:
        _fftshift_single(in_g, res_g, ax)
        in_g = res_g

    if return_buffer:
        return res_g
    else:
        return res_g.get()

示例#14

0

显示文件

def focus_field_lattice(shape,
                        units,
                        lam=.5,
                        NA1=.4,
                        NA2=.5,
                        sigma=.1,
                        Npoly=6,
                        n0=1.,
                        n_integration_steps=100):
    """
    """

    kxs, kys = .5 * (NA1 + NA2) * poly_points(Npoly)

    p = OCLProgram(absPath("kernels/psf_lattice.cl"),
                   build_options=[
                       "-I",
                       absPath("kernels"), "-D",
                       "INT_STEPS=%s" % n_integration_steps
                   ])

    kxs = np.array(kxs)
    kys = np.array(kys)

    Nx, Ny, Nz = shape
    dx, dy, dz = units

    alpha1 = np.arcsin(NA1 / n0)
    alpha2 = np.arcsin(NA2 / n0)

    u_g = OCLArray.empty((Nz, Ny, Nx), np.float32)
    ex_g = OCLArray.empty((Nz, Ny, Nx), np.complex64)
    ey_g = OCLArray.empty((Nz, Ny, Nx), np.complex64)
    ez_g = OCLArray.empty((Nz, Ny, Nx), np.complex64)

    kxs_g = OCLArray.from_array(kxs.astype(np.float32))
    kys_g = OCLArray.from_array(kys.astype(np.float32))

    t = time.time()

    p.run_kernel(
        "debye_wolf_lattice", (Nx, Ny, Nz), None,
        ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(1.),
        np.float32(0.), np.float32(-dx * (Nx - 1) / 2.),
        np.float32(dx * (Nx - 1) / 2.), np.float32(-dy * (Ny - 1) / 2.),
        np.float32(dy * (Ny - 1) / 2.), np.float32(-dz * (Nz - 1) / 2.),
        np.float32(dz * (Nz - 1) / 2.), np.float32(1. * lam / n0),
        np.float32(alpha1), np.float32(alpha2), kxs_g.data, kys_g.data,
        np.int32(len(kxs)), np.float32(sigma))

    ex = ex_g.get()

    print "time in secs:", time.time() - t
    return ex

示例#15

0

显示文件

文件： fftshift.py 项目： simone-codeluppi/gputools

def fftshift(arr_obj, axes = None, res_g = None, return_buffer = False):
    """
    gpu version of fftshift for numpy arrays or OCLArrays

    Parameters
    ----------
    arr_obj: numpy array or OCLArray (float32/complex64)
        the array to be fftshifted
    axes: list or None
        the axes over which to shift (like np.fft.fftshift)
        if None, all axes are taken
    res_g:
        if given, fills it with the result (has to be same shape and dtype as arr_obj)
        else internally creates a new one
    Returns
    -------
        if return_buffer, returns the result as (well :) OCLArray
        else returns the result as numpy array

    """

    if axes is None:
        axes = list(range(arr_obj.ndim))


    if isinstance(arr_obj, OCLArray):
        if not arr_obj.dtype.type in DTYPE_KERNEL_NAMES:
            raise NotImplementedError("only works for float32 or complex64")
    elif isinstance(arr_obj, np.ndarray):
        if np.iscomplexobj(arr_obj):
            arr_obj = OCLArray.from_array(arr_obj.astype(np.complex64,copy = False))
        else:
            arr_obj = OCLArray.from_array(arr_obj.astype(np.float32,copy = False))
    else:
        raise ValueError("unknown type (%s)"%(type(arr_obj)))

    if not np.all([arr_obj.shape[a]%2==0 for a in axes]):
        raise NotImplementedError("only works on axes of even dimensions")

    if res_g is None:
        res_g = OCLArray.empty_like(arr_obj)


    # iterate over all axes
    # FIXME: this is still rather inefficient
    in_g = arr_obj
    for ax in axes:
        _fftshift_single(in_g, res_g, ax)
        in_g = res_g

    if return_buffer:
        return res_g
    else:
        return res_g.get()

示例#16

0

显示文件

文件： deconv_rl.py 项目： DerThorsten/gputools

def _deconv_rl_np_fft(data, h, Niter = 10, 
                h_is_fftshifted = False):
    """ deconvolves data with given psf (kernel) h

    data and h have to be same shape

    
    via lucy richardson deconvolution
    """

    if data.shape != h.shape:
        raise ValueError("data and h have to be same shape")

    if not h_is_fftshifted:
        h = np.fft.fftshift(h)


    hflip = h[::-1,::-1]
        
    #set up some gpu buffers
    y_g = OCLArray.from_array(data.astype(np.complex64))
    u_g = OCLArray.from_array(data.astype(np.complex64))
    
    tmp_g = OCLArray.empty(data.shape,np.complex64)

    hf_g = OCLArray.from_array(h.astype(np.complex64))
    hflip_f_g = OCLArray.from_array(hflip.astype(np.complex64))

    # hflipped_g = OCLArray.from_array(h.astype(np.complex64))
    
    plan = fft_plan(data.shape)

    #transform psf
    fft(hf_g,inplace = True)
    fft(hflip_f_g,inplace = True)

    for i in range(Niter):
        print i
        fft_convolve(u_g, hf_g,
                     res_g = tmp_g,
                     kernel_is_fft = True)

        _complex_divide_inplace(y_g,tmp_g)

        fft_convolve(tmp_g,hflip_f_g,
                     inplace = True,
                     kernel_is_fft = True)

        _complex_multiply_inplace(u_g,tmp_g)
        

    return np.abs(u_g.get())

示例#17

0

显示文件

文件： gpu_kuwahara.py 项目： adibrov/RunForestGUI

def gpu_kuwahara(data, N=5):
    """Function to convolve an imgage with the Kuwahara filter on GPU."""
    # create numpy arrays


    if (N%2==0):       
        raise ValueError("Data has to be a (2n+1)x(2n+1) array.")

    
    data_g = OCLArray.from_array(data.astype(float32)) 
       
    res_g = OCLArray.empty((data.shape[0],data.shape[1]),float32) 
    
    prog = OCLProgram("./OpenCL/gpu_kernels/gpu_kuwahara.cl")
    
    # start kernel on gput
    prog.run_kernel("kuwahara",   # the name of the kernel in the cl file
                   data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) 
                    None,   # local size, just leave it to None
                    data_g.data,res_g.data,
                    int32(N)) 
                    
    
#                    
    
    return res_g.get()

示例#18

0

显示文件

def _deconv_rl_gpu_conv(data_g, h_g, Niter=10):
    """
    using convolve

    """

    #set up some gpu buffers
    u_g = OCLArray.empty(data_g.shape, np.float32)

    u_g.copy_buffer(data_g)

    tmp_g = OCLArray.empty(data_g.shape, np.float32)
    tmp2_g = OCLArray.empty(data_g.shape, np.float32)

    #fix this
    hflip_g = OCLArray.from_array((h_g.get()[::-1, ::-1]).copy())

    for i in range(Niter):
        convolve(u_g, h_g, res_g=tmp_g)

        _divide_inplace(data_g, tmp_g)

        # return data_g, tmp_g

        convolve(tmp_g, hflip_g, res_g=tmp2_g)
        _multiply_inplace(u_g, tmp2_g)

    return u_g

示例#19

0

显示文件

文件： test_fft_accur.py 项目： maweigert/gputools

def test_parseval():

    from time import time
    Nx = 512
    Nz  = 10
    d = np.random.uniform(-1,1,(Nx,Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [],[]
    t = time()
    for i in range(Nz):
        print i
        # myfunc(d_g)

        # fft(d_g, inplace=True, fast_math=False)
        # fft(d_g, inverse = True,inplace=True,fast_math=False)

        fft(d_g, inplace=True)
        # fft(d_g, inverse = True,inplace=True)

    s1.append(np.sum(np.abs(d_g.get())**2))

    print time()-t

    for i in range(Nz):
        print i
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2

示例#20

0

显示文件

文件： test_accur.py 项目： simone-codeluppi/gputools

def test_parseval():

    from time import time
    Nx = 512
    Nz = 10
    d = np.random.uniform(-1, 1, (Nx, Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [], []
    t = time()
    for i in range(Nz):
        # myfunc(d_g)

        # fft(d_g, inplace=True, fast_math=False)
        # fft(d_g, inverse = True,inplace=True,fast_math=False)

        fft(d_g, inplace=True)
        # fft(d_g, inverse = True,inplace=True)

    s1.append(np.sum(np.abs(d_g.get())**2))

    print(time() - t)

    for i in range(Nz):
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2

示例#21

0

显示文件

    def time_multi(N, nargs, niter=100):
        map_exprs = ["%s*x%s[i]" % (i, i) for i in xrange(nargs)]
        arguments = ",".join("__global float *x%s" % i for i in xrange(nargs))

        k = OCLReductionKernel2(np.float32,
                                neutral="0",
                                reduce_expr="a+b",
                                map_exprs=map_exprs,
                                arguments=arguments)

        ins = [
            OCLArray.from_array(np.ones(N, np.float32))
            for _ in xrange(len(map_exprs))
        ]
        outs = [OCLArray.empty(1, np.float32) for _ in xrange(len(map_exprs))]

        from time import time
        t = time()
        for _ in xrange(niter):
            k(*ins, outs=outs)
        get_device().queue.finish()
        t = (time() - t) / niter
        print "multi reduction: result =", [float(out.get()) for out in outs]
        print "multi reduction:\t\t%.2f ms" % (1000 * t)
        return t

示例#22

0

显示文件

    def time_simple(N, nargs, niter=100):
        from gputools import OCLReductionKernel

        map_exprs = ["%s*x[i]" % i for i in xrange(nargs)]

        ks = [
            OCLReductionKernel(np.float32,
                               neutral="0",
                               reduce_expr="a+b",
                               map_expr="%s*x[i]" % i,
                               arguments="__global float *x")
            for i in xrange(len(map_exprs))
        ]

        ins = [
            OCLArray.from_array(np.ones(N, np.float32))
            for _ in xrange(len(map_exprs))
        ]
        outs = [OCLArray.empty(1, np.float32) for _ in xrange(len(map_exprs))]

        from time import time
        t = time()
        for _ in xrange(niter):
            for k, inn, out in zip(ks, ins, outs):
                k(inn, out=out)
        get_device().queue.finish()
        t = (time() - t) / niter
        print "simple reduction: result =", [float(out.get()) for out in outs]
        print "simple reduction:\t\t%.2f ms" % (1000 * t)
        return t

示例#23

0

显示文件

文件： deconv_rl.py 项目： DerThorsten/gputools

def _deconv_rl_gpu_conv(data_g, h_g, Niter = 10):
    """ 
    using convolve

    """
        
    #set up some gpu buffers
    u_g = OCLArray.empty(data_g.shape,np.float32)

    u_g.copy_buffer(data_g)
    
    tmp_g = OCLArray.empty(data_g.shape,np.float32)
    tmp2_g = OCLArray.empty(data_g.shape,np.float32)

    #fix this
    hflip_g = OCLArray.from_array((h_g.get()[::-1,::-1]).copy())

    for i in range(Niter):
        convolve(u_g, h_g,
                 res_g = tmp_g)


        _divide_inplace(data_g,tmp_g)

        # return data_g, tmp_g
        
        convolve(tmp_g, hflip_g,
                 res_g = tmp2_g)
        _multiply_inplace(u_g,tmp2_g)

    return u_g

示例#24

0

显示文件

文件： bpm3d.py 项目： maweigert/biobeam

 def _transfer_dn(self, dn):
     if self._is_subsampled:
         self._im_dn = OCLImage.from_array(
             self._copy_arr_with_correct_type(dn))
     else:
         self._buf_dn = OCLArray.from_array(
             self._copy_arr_with_correct_type(dn))

示例#25

0

显示文件

文件： push.py 项目： VolkerH/pyclesperanto_prototype

def push(any_array):
    '''
    converts a numpy array to an OpenCL array

    This method does the same as the converters in CLIJ but is less flexible
    https://github.com/clij/clij-core/tree/master/src/main/java/net/haesleinhuepf/clij/converters/implementations

    :param any_array: input numpy array
    :return: opencl-array
    '''

    if (isinstance(any_array, OCLArray)):
        return any_array

    temp = any_array.astype(np.float32)
    #print("tmep: ")
    #print(temp)

    if (len(temp.shape) == 2):
        temp = np.swapaxes(temp, 0, 1)
    else:
        temp = np.swapaxes(temp, 0, 2)

    temp2 = OCLArray.from_array(temp)
    return temp2

示例#26

0

显示文件

def create_dn_buffer(size,
                     units,
                     points,
                     dn_inner=.0,
                     rad_inner=0,
                     dn_outer=.1,
                     rad_outer=.4):

    Nx, Ny, Nz = size
    dx, dy, dz = units

    program = OCLProgram(absPath("kernels/bpm_3d_spheres.cl"))

    dn_g = OCLArray.empty((Nz, Ny, Nx), dtype=np.float32)

    # sort by z
    ps = np.array(points)
    ps = ps[np.argsort(ps[:, 2]), :]

    Np = ps.shape[0]

    pointsBuf = OCLArray.from_array(ps.flatten().astype(np.float32))

    program.run_kernel("fill_dn", (Nx, Ny, Nz), None, dn_g.data,
                       pointsBuf.data, np.int32(Np), np.float32(dx),
                       np.float32(dy), np.float32(dz), np.float32(dn_inner),
                       np.float32(rad_inner), np.float32(dn_outer),
                       np.float32(rad_outer))

    return dn_g

示例#27

0

显示文件

文件： bpm_3d_spheres.py 项目： maweigert/bpm

def create_dn_buffer(size, units,points,
                     dn_inner = .0, rad_inner = 0,
                     dn_outer = .1, rad_outer = .4):

    Nx, Ny, Nz = size
    dx, dy, dz = units

    program = OCLProgram(absPath("kernels/bpm_3d_spheres.cl"))


    dn_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32)

    # sort by z
    ps = np.array(points)
    ps = ps[np.argsort(ps[:,2]),:]

    Np = ps.shape[0]

    pointsBuf = OCLArray.from_array(ps.flatten().astype(np.float32))

    program.run_kernel("fill_dn",(Nx,Ny,Nz),None,dn_g.data,
                       pointsBuf.data,np.int32(Np),
                       np.float32(dx),np.float32(dy),np.float32(dz),
                       np.float32(dn_inner),np.float32(rad_inner),
                       np.float32(dn_outer),np.float32(rad_outer))


    return dn_g

示例#28

0

显示文件

def _gaussian_buf(d_g,
                  sigma=(4., 4.),
                  res_g=None,
                  normalize=True,
                  truncate=4.0):
    radius = tuple(int(truncate * s + 0.5) for s in sigma)

    ns = tuple(np.arange(-r, r + 1) for r in radius)

    hs = tuple(
        np.exp(-.5 / s**2 * n**2)
        for s, n in zip(reversed(sigma), reversed(ns)))

    if normalize:
        hs = tuple(1. * h / np.sum(h) for h in hs)

    h_gs = tuple(OCLArray.from_array(h.astype(np.float32)) for h in hs)

    if len(d_g.shape) == 1:
        return convolve(d_g, *h_gs, res_g=res_g)
    elif len(d_g.shape) == 2:
        return convolve_sep2(d_g, *h_gs, res_g=res_g)
    elif len(d_g.shape) == 3:
        return convolve_sep3(d_g, *h_gs, res_g=res_g)
    else:
        raise NotImplentedError("only 1D, 2D, or 3D images supported yet")

示例#29

0

显示文件

def _deconv_rl_np_fft(data, h, Niter=10, h_is_fftshifted=False):
    """ deconvolves data with given psf (kernel) h

    data and h have to be same shape


    via lucy richardson deconvolution
    """

    if data.shape != h.shape:
        raise ValueError("data and h have to be same shape")

    if not h_is_fftshifted:
        h = np.fft.fftshift(h)

    hflip = h[::-1, ::-1]

    #set up some gpu buffers
    y_g = OCLArray.from_array(data.astype(np.complex64))
    u_g = OCLArray.from_array(data.astype(np.complex64))

    tmp_g = OCLArray.empty(data.shape, np.complex64)

    hf_g = OCLArray.from_array(h.astype(np.complex64))
    hflip_f_g = OCLArray.from_array(hflip.astype(np.complex64))

    # hflipped_g = OCLArray.from_array(h.astype(np.complex64))

    plan = fft_plan(data.shape)

    #transform psf
    fft(hf_g, inplace=True)
    fft(hflip_f_g, inplace=True)

    for i in range(Niter):
        logger.info("Iteration: {}".format(i))
        fft_convolve(u_g, hf_g, res_g=tmp_g, kernel_is_fft=True)

        _complex_divide_inplace(y_g, tmp_g)

        fft_convolve(tmp_g, hflip_f_g, inplace=True, kernel_is_fft=True)

        _complex_multiply_inplace(u_g, tmp_g)

    return np.abs(u_g.get())

示例#30

0

显示文件

文件： convolve_sep.py 项目： maweigert/gputools

def test_2d():
    import time
    
    data = np.zeros((100,)*2,np.float32)

    data[50,50] = 1.
    hx = 1./5*np.ones(5)
    hy = 1./13*np.ones(13)

    out = convolve_sep2(data,hx,hy)

    data_g = OCLArray.from_array(data.astype(np.float32))
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))

    out_g = convolve_sep2(data_g,hx_g,hy_g)
        
    return  out, out_g.get()

示例#31

0

显示文件

文件： oclfft.py 项目： simone-codeluppi/gputools

def _ocl_fft_numpy(plan, arr, inverse=False, fast_math=True):
    if arr.dtype != np.complex64:
        logger.info("converting %s to complex64, might slow things down..." % arr.dtype)

    ocl_arr = OCLArray.from_array(arr.astype(np.complex64, copy=False))

    _ocl_fft_gpu_inplace(plan, ocl_arr, inverse=inverse)

    return ocl_arr.get()

示例#32

0

显示文件

文件： _focus_fields_debye.py 项目： maweigert/bpm

def test_bessel(n,x):
    x_g = OCLArray.from_array(x.astype(float32))
    res_g = OCLArray.empty_like(x.astype(float32))
    
    p = OCLProgram(absPath("kernels/bessel.cl"))
    p.run_kernel("bessel_fill",x_g.shape,None,
                 x_g.data,res_g.data,int32(n))

    return res_g.get()

示例#33

0

显示文件

def test_2d():
    import time

    data = np.zeros((100, ) * 2, np.float32)

    data[50, 50] = 1.
    hx = 1. / 5 * np.ones(5)
    hy = 1. / 13 * np.ones(13)

    out = convolve_sep2(data, hx, hy)

    data_g = OCLArray.from_array(data.astype(np.float32))
    hx_g = OCLArray.from_array(hx.astype(np.float32))
    hy_g = OCLArray.from_array(hy.astype(np.float32))

    out_g = convolve_sep2(data_g, hx_g, hy_g)

    return out, out_g.get()

示例#34

0

显示文件

def test_bessel(n, x):
    x_g = OCLArray.from_array(x.astype(float32))
    res_g = OCLArray.empty_like(x.astype(float32))

    p = OCLProgram(absPath("kernels/bessel.cl"))
    p.run_kernel("bessel_fill", x_g.shape, None, x_g.data, res_g.data,
                 int32(n))

    return res_g.get()

示例#35

0

显示文件

文件： _focus_fields_debye.py 项目： maweigert/bpm

def focus_field_debye_at(x,y,z,lam, NA, n0 = 1., n_integration_steps = 200):
    """ the same as focus_field_debye but for the coordinates given in x, y, z (arrays of same shape)

        slower than focus_field_debye as it doesnt assume the coordinates to be on a grid
    """

    print absPath("kernels/psf_debye.cl")
    p = OCLProgram(absPath("kernels/psf_debye.cl"),
                   build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps)))

    if np.isscalar(NA):
        NA = [0.,NA]

    alphas = np.arcsin(np.array(NA)/n0)
    assert len(alphas)%2 ==0

    assert x.shape == y.shape == z.shape
    dshape =x.shape
    N = np.prod(dshape)

    x_g = OCLArray.from_array(x.flatten().astype(np.float32))
    y_g = OCLArray.from_array(y.flatten().astype(np.float32))
    z_g = OCLArray.from_array(z.flatten().astype(np.float32))

    u_g = OCLArray.empty(N,np.float32)
    ex_g = OCLArray.empty(N,np.complex64)
    ey_g = OCLArray.empty(N,np.complex64)
    ez_g = OCLArray.empty(N,np.complex64)

    alpha_g = OCLArray.from_array(alphas.astype(np.float32))

    p.run_kernel("debye_wolf_at",(N,),None,
                 x_g.data,y_g.data,z_g.data,
                 ex_g.data,ey_g.data,ez_g.data, u_g.data,
                 np.float32(1.),np.float32(0.),
                 np.float32(lam/n0),
                 alpha_g.data, np.int32(len(alphas)))

    u = u_g.get().reshape(dshape)
    ex = ex_g.get().reshape(dshape)
    ey = ey_g.get().reshape(dshape)
    ez = ez_g.get().reshape(dshape)

    return u, ex, ey, ez

示例#36

0

显示文件

def focus_field_debye_at(x, y, z, lam, NA, n0=1., n_integration_steps=200):
    """ the same as focus_field_debye but for the coordinates given in x, y, z (arrays of same shape)

        slower than focus_field_debye as it doesnt assume the coordinates to be on a grid
    """

    print absPath("kernels/psf_debye.cl")
    p = OCLProgram(absPath("kernels/psf_debye.cl"),
                   build_options=str("-I %s -D INT_STEPS=%s" %
                                     (absPath("."), n_integration_steps)))

    if np.isscalar(NA):
        NA = [0., NA]

    alphas = np.arcsin(np.array(NA) / n0)
    assert len(alphas) % 2 == 0

    assert x.shape == y.shape == z.shape
    dshape = x.shape
    N = np.prod(dshape)

    x_g = OCLArray.from_array(x.flatten().astype(np.float32))
    y_g = OCLArray.from_array(y.flatten().astype(np.float32))
    z_g = OCLArray.from_array(z.flatten().astype(np.float32))

    u_g = OCLArray.empty(N, np.float32)
    ex_g = OCLArray.empty(N, np.complex64)
    ey_g = OCLArray.empty(N, np.complex64)
    ez_g = OCLArray.empty(N, np.complex64)

    alpha_g = OCLArray.from_array(alphas.astype(np.float32))

    p.run_kernel("debye_wolf_at", (N, ), None, x_g.data, y_g.data, z_g.data,
                 ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(1.),
                 np.float32(0.), np.float32(lam / n0), alpha_g.data,
                 np.int32(len(alphas)))

    u = u_g.get().reshape(dshape)
    ex = ex_g.get().reshape(dshape)
    ey = ey_g.get().reshape(dshape)
    ez = ez_g.get().reshape(dshape)

    return u, ex, ey, ez

示例#37

0

显示文件

文件： oclfft.py 项目： maweigert/gputools

def _ocl_fft_numpy(plan, arr,inverse = False, batch = 1, fast_math = True):

    if arr.dtype != np.complex64:
       logger.info("converting %s to complex64, might slow things down..."%arr.dtype)

    ocl_arr = OCLArray.from_array(arr.astype(np.complex64,copy=False))
    
    _ocl_fft_gpu_inplace(plan, ocl_arr, inverse = inverse, batch = batch)
    
    return ocl_arr.get()

示例#38

0

显示文件

文件： oclfft.py 项目： robintw/gputools

def _ocl_fft_numpy(arr,inverse = False, plan = None):
    if plan is None:
        plan = Plan(arr.shape, queue = get_device().queue)

    if arr.dtype != np.complex64:
       logger.info("converting %s to complex64, might slow things down..."%arr.dtype)

    ocl_arr = OCLArray.from_array(arr.astype(np.complex64,copy=False))
    
    _ocl_fft_gpu_inplace(ocl_arr, inverse = inverse, plan  = plan)
    
    return ocl_arr.get()

示例#39

0

显示文件

    def _setup_impl(self):
        """setting up the gpu buffers and kernels
        """

        self.bpm_program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

        Nx, Ny, Nz = self.size

        self._plan = fft_plan((Ny, Nx))

        self._H_g = OCLArray.from_array(self._H.astype(np.complex64))

        if not self.dn is None and self.n_volumes == 1:
            self.dn_g = OCLArray.from_array(self.dn)

        self.scatter_weights_g = OCLArray.from_array(
            self.scatter_weights.astype(np.float32))
        self.gfactor_weights_g = OCLArray.from_array(
            self.gfactor_weights.astype(np.float32))

        self.scatter_cross_sec_g = OCLArray.zeros(Nz, "float32")
        self.gfactor_g = OCLArray.zeros(Nz, "float32")

示例#40

0

显示文件

文件： oclfft.py 项目： robintw/gputools

def _ocl_fft_numpy(arr, inverse=False, plan=None):
    if plan is None:
        plan = Plan(arr.shape, queue=get_device().queue)

    if arr.dtype != np.complex64:
        logger.info("converting %s to complex64, might slow things down..." %
                    arr.dtype)

    ocl_arr = OCLArray.from_array(arr.astype(np.complex64, copy=False))

    _ocl_fft_gpu_inplace(ocl_arr, inverse=inverse, plan=plan)

    return ocl_arr.get()

示例#41

0

显示文件

文件： _bpm3d_ocl.py 项目： maweigert/bpm

    def _setup_impl(self):
        """setting up the gpu buffers and kernels
        """

        self.bpm_program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

        Nx, Ny, Nz  = self.size

        self._plan = fft_plan((Ny,Nx))


        self._H_g = OCLArray.from_array(self._H.astype(np.complex64))

        if not self.dn is None and self.n_volumes==1:
           self.dn_g = OCLArray.from_array(self.dn)


        self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32))
        self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32))

        self.scatter_cross_sec_g = OCLArray.zeros(Nz,"float32")
        self.gfactor_g = OCLArray.zeros(Nz,"float32")

示例#42

0

显示文件

    def __init__(self,
                 psf: np.ndarray,
                 psf_is_fftshifted: bool = False,
                 n_iter=10):
        """ setup deconvolution for a given shape """
        self.shape = psf.shape
        if not psf_is_fftshifted:
            psf = np.fft.fftshift(psf)

        self.n_iter = n_iter
        # What happens here? Indices are being flipped ? Why. What if it is 3D?
        psfflip = psf[::-1, ::-1]

        self.psf_g = OCLArray.from_array(psf.astype(np.complex64))
        self.psfflip_f_g = OCLArray.from_array(psfflip.astype(np.complex64))
        self.plan = fft_plan(self.shape)

        # transform psf
        fft(self.psf_g, inplace=True)
        fft(self.psfflip_f_g, inplace=True)

        # get temp
        self.tmp_g = OCLArray.empty(psf.shape, np.complex64)

示例#43

0

显示文件

文件： bpm_class.py 项目： Wenhan-Zhang-327/bpm

    def setup(self, size, units, lam = .5, n0 = 1.,
              use_fresnel_approx = False):
        """
            sets up the internal variables e.g. propagators etc...

            :param size:  the size of the geometry in pixels (Nx,Ny,Nz)
            :param units: the phyiscal units of each voxel in microns (dx,dy,dz)
            :param lam: the wavelength of light in microns
            :param n0:  the refractive index of the surrounding media
            :param use_fresnel_approx:  if True, uses fresnel approximation for propagator


        """
        Bpm3d_Base.setup(self,size, units, lam = lam, n0 = n0,
              use_fresnel_approx = use_fresnel_approx)

        #setting up the gpu buffers and kernels
        self.program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

        Nx, Ny  = self.size[:2]
        plan = fft_plan(())
        self._H_g = OCLArray.from_array(self._H.astype(np.complex64))


        self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32))
        self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32))

        self.scatter_cross_sec_g = OCLArray.zeros(Nz,"float32")
        self.gfactor_g = OCLArray.zeros(Nz,"float32")



        self.reduce_kernel = OCLReductionKernel(
        np.float32, neutral="0",
            reduce_expr="a+b",
            map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)",
            arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain")

示例#44

0

显示文件

文件： test_resample.py 项目： simone-codeluppi/gputools

def transfer(data):
    """transfers data"""

    d1_g = OCLArray.from_array(data)
    d2_g = OCLArray.empty_like(data)

    if data.dtype.type == np.float32:
        im = OCLImage.empty(data.shape[::1], dtype=np.float32)
    elif data.dtype.type == np.complex64:
        im = OCLImage.empty(data.shape[::1], dtype=np.float32, num_channels=2)

    im.copy_buffer(d1_g)
    d2_g.copy_image(im)

    return d2_g.get()

示例#45

0

显示文件

文件： test_resample.py 项目： maweigert/gputools

def resample_buf(data, new_shape):
    """resamples d"""

    d1_g = OCLArray.from_array(data)
    d2_g = OCLArray.empty(new_shape,data.dtype)

    if data.dtype.type == np.float32:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32)
    elif data.dtype.type == np.complex64:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32, num_channels=2)

    im.copy_buffer(d1_g)
    d2_g.copy_image_resampled(im)

    return d2_g.get()

示例#46

0

显示文件

文件： test_resample.py 项目： maweigert/gputools

def transfer(data):
    """transfers data"""

    d1_g = OCLArray.from_array(data)
    d2_g = OCLArray.empty_like(data)

    if data.dtype.type == np.float32:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32)
    elif data.dtype.type == np.complex64:
        im = OCLImage.empty(data.shape[::1],dtype = np.float32, num_channels=2)

    im.copy_buffer(d1_g)
    d2_g.copy_image(im)

    return d2_g.get()

示例#47

0

显示文件

文件： test_resample.py 项目： simone-codeluppi/gputools

def resample_buf(data, new_shape):
    """resamples d"""

    d1_g = OCLArray.from_array(data)
    d2_g = OCLArray.empty(new_shape, data.dtype)

    if data.dtype.type == np.float32:
        im = OCLImage.empty(data.shape[::1], dtype=np.float32)
    elif data.dtype.type == np.complex64:
        im = OCLImage.empty(data.shape[::1], dtype=np.float32, num_channels=2)

    im.copy_buffer(d1_g)
    d2_g.copy_image_resampled(im)

    return d2_g.get()

示例#48

0

显示文件

文件： blur.py 项目： maweigert/gputools

def _blur_buf(d_g, width=(4.0, 4.0), res_g=None):

    Ns = [3 * s + 1 for s in width]
    sigmas = [0.5 * s for s in width]

    hs = [np.exp(-0.5 / s ** 2 * np.linspace(-N / 2, N / 2, N) ** 2) for s, N in zip(sigmas, Ns)]

    h_gs = [OCLArray.from_array(h.astype(np.float32)) for h in hs][::-1]

    if len(d_g.shape) == 1:
        return convolve(d_g, *h_gs, res_g=res_g)
    elif len(d_g.shape) == 2:
        return convolve_sep2(d_g, *h_gs, res_g=res_g)
    elif len(d_g.shape) == 3:
        return convolve_sep3(d_g, *h_gs, res_g=res_g)

    else:
        pass

示例#49

0

显示文件

文件： deconv_rl.py 项目： DerThorsten/gputools

def _deconv_rl_gpu_fft(data_g, h_g, Niter = 10):
    """ 
    using fft_convolve

    """


    if data_g.shape != h_g.shape:
        raise ValueError("data and h have to be same shape")

        
    #set up some gpu buffers
    u_g = OCLArray.empty(data_g.shape,np.complex64)

    u_g.copy_buffer(data_g)
    
    tmp_g = OCLArray.empty(data_g.shape,np.complex64)

    #fix this
    hflip_g = OCLArray.from_array((h_g.get()[::-1,::-1]).copy())

    plan = fft_plan(data_g.shape)

    #transform psf
    fft(h_g,inplace = True)
    fft(hflip_g,inplace = True)

    for i in range(Niter):
        print i
        fft_convolve(u_g, h_g,
                     res_g = tmp_g,
                     kernel_is_fft = True)


        _complex_divide_inplace(data_g,tmp_g)

        
        fft_convolve(tmp_g,hflip_g,
                     inplace = True,
                     kernel_is_fft = True)

        _complex_multiply_inplace(u_g,tmp_g)

    return u_g

示例#50

0

显示文件

文件： test_fft_accur.py 项目： maweigert/biobeam

def get_gpu(N=256, niter=100, sig=1.):
    np.random.seed(0)
    a = np.random.normal(0, sig, (N, N)).astype(np.complex64)
    b = (1. * a.copy()).astype(np.complex64)

    c_g = OCLArray.empty_like(b)
    b_g = OCLArray.from_array(b)
    p = fft_plan((N, N), fast_math=False)

    rels = []
    for _ in range(niter):
        fft(b_g, res_g=c_g, plan=p)
        fft(c_g, res_g=b_g, inverse=True, plan=p)

        # b = fft(fft(b), inverse = True)
        # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a)))
        rels.append(np.amax(np.abs(a - b_g.get())) / np.amax(np.abs(a)))

    return np.array(rels)

示例#51

0

显示文件

文件： gpu_structure.py 项目： adibrov/RunForestGUI

def gpu_structure(data):
    """Function to convolve an imgage with a structure filter on GPU."""
    # create numpy arrays
    
    
    data_g = OCLArray.from_array(data.astype(float32)) 
       
    res_g = OCLArray.empty((data.shape[0],data.shape[1],2),float32) 
    
    prog = OCLProgram("./OpenCL/gpu_kernels/gpu_structure.cl")
    
    # start kernel on gput
    prog.run_kernel("structure",   # the name of the kernel in the cl file
                    data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) 
                    None,   # local size, just leave it to None
                    data_g.data,res_g.data) 
                    
                    
    return res_g.get()

示例#52

0

显示文件

文件： test_fft_accur.py 项目： maweigert/biobeam

def get_gpu(N = 256, niter=100, sig = 1.):
    np.random.seed(0)
    a = np.random.normal(0,sig,(N,N)).astype(np.complex64)
    b = (1.*a.copy()).astype(np.complex64)

    c_g = OCLArray.empty_like(b)
    b_g = OCLArray.from_array(b)
    p = fft_plan((N,N), fast_math = False)
    
    rels = []
    for _ in range(niter):
        fft(b_g,res_g = c_g, plan = p)
        fft(c_g, res_g = b_g, inverse = True, plan = p)

        # b = fft(fft(b), inverse = True)
        # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a)))
        rels.append(np.amax(np.abs(a-b_g.get()))/np.amax(np.abs(a)))

    return np.array(rels)

示例#53

0

显示文件

文件： gpu_mean.py 项目： adibrov/RunForestGUI

def gpu_mean(data, Nx=10,Ny=10):
    """Function to convolve an imgage with a mean filter on GPU."""
    # create numpy arrays
    
    
    data_g = OCLArray.from_array(data.astype(float32)) 
       
    res_g = OCLArray.empty(data.shape,float32) 
    
    prog = OCLProgram("./OpenCL/gpu_kernels/gpu_mean.cl")
    
    # start kernel on gput
    prog.run_kernel("mean",   # the name of the kernel in the cl file
                    data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) 
                    None,   # local size, just leave it to None
                    data_g.data,res_g.data,
                    int32(Nx),int32(Ny)) 
                    
                    
    return res_g.get()

示例#54

0

显示文件

文件： test_fft_accur.py 项目： maweigert/biobeam

def test_parseval():
    Nx = 512
    Nz  = 100
    d = np.random.uniform(-1,1,(Nx,Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [],[]
    for i in range(Nz):
        print(i)
        fft(d_g, inplace=True, fast_math=False)
        fft(d_g, inverse = True,inplace=True,fast_math=False)
        s1.append(np.sum(np.abs(d_g.get())**2))

    for i in range(Nz):
        print(i)
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2

示例#55

0

显示文件

文件： test_fft_accur.py 项目： maweigert/biobeam

def test_parseval():
    Nx = 512
    Nz = 100
    d = np.random.uniform(-1, 1, (Nx, Nx)).astype(np.complex64)
    d_g = OCLArray.from_array(d.astype(np.complex64))

    s1, s2 = [], []
    for i in range(Nz):
        print(i)
        fft(d_g, inplace=True, fast_math=False)
        fft(d_g, inverse=True, inplace=True, fast_math=False)
        s1.append(np.sum(np.abs(d_g.get())**2))

    for i in range(Nz):
        print(i)
        d = np.fft.fftn(d).astype(np.complex64)
        d = np.fft.ifftn(d).astype(np.complex64)
        s2.append(np.sum(np.abs(d)**2))

    return s1, s2

示例#56

0

显示文件

文件： oclmultireduction.py 项目： spaghettisort/gputools

    def time_multi(N, nargs, niter =100):
        map_exprs=["%s*x%s[i]"%(i,i) for i in xrange(nargs)]
        arguments = ",".join("__global float *x%s"%i for i in xrange(nargs))

        k = OCLReductionKernel2(np.float32,
                            neutral="0", reduce_expr="a+b",
                            map_exprs=map_exprs,
                            arguments=arguments)

        ins = [OCLArray.from_array(np.ones(N,np.float32)) for _ in xrange(len(map_exprs))]
        outs = [OCLArray.empty(1,np.float32) for _ in xrange(len(map_exprs))]

        from time import time
        t = time()
        for _ in xrange(niter):
            k(*ins, outs = outs)
        get_device().queue.finish()
        t = (time()-t)/niter
        print "multi reduction: result =", [float(out.get()) for out in outs]
        print "multi reduction:\t\t%.2f ms"%(1000*t)
        return t

示例#57

0

显示文件

文件： transformations.py 项目： spaghettisort/gputools

def affine(data, mat = np.identity(4), mode ="linear"):
    """affine transform data with matrix mat

    """ 

    bop = {"linear":"","nearest":"-D USENEAREST"}

    if not mode in bop.keys():
        raise KeyError("mode = '%s' not defined ,valid: %s"%(mode, bop.keys()))
    
    d_im = OCLImage.from_array(data)
    res_g = OCLArray.empty(data.shape,np.float32)
    mat_g = OCLArray.from_array(np.linalg.inv(mat).astype(np.float32,copy=False))

    prog = OCLProgram(abspath("kernels/transformations.cl")
                      , build_options=[bop[mode]])

    prog.run_kernel("affine",
                    data.shape[::-1],None,
                    d_im,res_g.data,mat_g.data)

    return res_g.get()

示例#58

0

显示文件

文件： convolve.py 项目： DerThorsten/gputools

def _convolve3_old(data,h, dev = None):
    """convolves 3d data with kernel h on the GPU Device dev
    boundary conditions are clamping to edge.
    h is converted to float32

    if dev == None the default one is used
    """

    if dev is None:
        dev = get_device()

    if dev is None:
        raise ValueError("no OpenCLDevice found...")

    dtype = data.dtype.type

    dtypes_options = {np.float32:"",
                      np.uint16:"-D SHORTTYPE"}

    if not dtype in dtypes_options.keys():
        raise TypeError("data type %s not supported yet, please convert to:"%dtype,dtypes_options.keys())

    prog = OCLProgram(abspath("kernels/convolve3.cl"),
                      build_options = dtypes_options[dtype])

    
    hbuf = OCLArray.from_array(h.astype(np.float32))
    img = OCLImage.from_array(data)
    res = OCLArray.empty(data.shape,dtype=np.float32)

    Ns = [np.int32(n) for n in data.shape+h.shape]

    prog.run_kernel("convolve3d",img.shape,None,
                    img,hbuf.data,res.data,
                    *Ns)

    return res.get()

示例#59

0

显示文件

文件： oclmultireduction.py 项目： spaghettisort/gputools

    def time_simple(N, nargs, niter =100):
        from gputools import OCLReductionKernel

        map_exprs=["%s*x[i]"%i for i in xrange(nargs)]


        ks = [OCLReductionKernel(np.float32,
                            neutral="0", reduce_expr="a+b",
                            map_expr="%s*x[i]"%i,
                            arguments="__global float *x") for i in xrange(len(map_exprs))]

        ins = [OCLArray.from_array(np.ones(N,np.float32)) for _ in xrange(len(map_exprs))]
        outs = [OCLArray.empty(1,np.float32) for _ in xrange(len(map_exprs))]

        from time import time
        t = time()
        for _ in xrange(niter):
            for k,inn,out in zip(ks,ins,outs):
                k(inn, out = out)
        get_device().queue.finish()
        t = (time()-t)/niter
        print "simple reduction: result =", [float(out.get()) for out in outs]
        print "simple reduction:\t\t%.2f ms"%(1000*t)
        return t