Exemplo n.º 1
0
def prepare_video_cuda():
    # Things go *a lot* faster when you have the memory structures pre-allocated
    cs['plan1'] = fft.Plan(blocklen, np.float32, np.complex64)
    cs['plan1i'] = fft.Plan(blocklen, np.complex64, np.complex64)

    cs['fft1_out'] = gpuarray.empty((blocklen // 2) + 1, np.complex64)

    cs['filtered1'] = gpuarray.empty(blocklen, np.complex64)
    cs['fm_demod'] = gpuarray.empty(blocklen, np.float32)

    cs['postlpf'] = gpuarray.empty(blocklen, np.float32)

    cs['fft2_out'] = gpuarray.empty((blocklen // 2) + 1, np.complex64)

    cs['clipped_gpu'] = gpuarray.empty(blocklen, np.uint16)

    cs['plan2'] = fft.Plan(blocklen, np.float32, np.complex64)
    cs['plan2i'] = fft.Plan(blocklen, np.complex64, np.float32)

    # CUDA functions.  The fewer setups we need, the faster it goes.
    cs['doclamp16'] = mod.get_function("clamp16")
    cs['doanglediff'] = mod.get_function("anglediff")

    # GPU-stored frequency-fomain filters
    cs['filt_post'] = FFTtoGPU(SysParams['fft_post'])
    cs['filt_video'] = FFTtoGPU(SysParams['fft_video'])
    cs['filt_video_inner'] = FFTtoGPU(SysParams['fft_video_inner'])
Exemplo n.º 2
0
def prepare_audio_cuda():
    cs['plan1'] = fft.Plan(blocklen, np.float32, np.complex64)
    cs['plan1i'] = fft.Plan(ablocklen, np.complex64, np.complex64)

    cs['fft1_out'] = gpuarray.empty(blocklen, np.complex64)
    cs['ifft1_out'] = gpuarray.empty(ablocklen, np.complex64)

    cs['fm_left'] = gpuarray.empty(ablocklen, np.complex64)
    cs['fm_right'] = gpuarray.empty(ablocklen, np.complex64)

    cs['left_clipped'] = gpuarray.empty(ablocklen, np.float32)
    cs['right_clipped'] = gpuarray.empty(ablocklen, np.float32)

    cs['left_fft1'] = gpuarray.empty(blocklen // 2 + 1, np.complex64)
    cs['right_fft1'] = gpuarray.empty(blocklen // 2 + 1, np.complex64)
    cs['left_fft2'] = gpuarray.empty(ablocklen // 2 + 1, np.complex64)
    cs['right_fft2'] = gpuarray.empty(ablocklen // 2 + 1, np.complex64)

    cs['left_out'] = gpuarray.empty(ablocklen, np.float32)
    cs['right_out'] = gpuarray.empty(ablocklen, np.float32)

    cs['plan2'] = fft.Plan(ablocklen, np.float32, np.complex64)
    cs['plan2i'] = fft.Plan(ablocklen, np.complex64, np.float32)

    cs['outlen'] = outlen = ablocklen // 20
    cs['scaledout'] = gpuarray.empty(outlen * 2, np.float32)
    cs['left_scaledout'] = gpuarray.empty(outlen, np.float32)
    cs['right_scaledout'] = gpuarray.empty(outlen, np.float32)

    cs['doanglediff_mac'] = mod.get_function("anglediff_mac")
    cs['doaudioscale'] = mod.get_function("audioscale")

    cs['filt_audiolpf'] = FFTtoGPU(SysParams['fft_audiolpf'])
    cs['filt_audio_left'] = FFTtoGPU(SysParams['fft_audio_left'])
    cs['filt_audio_right'] = FFTtoGPU(SysParams['fft_audio_right'])
def cufft_conv(x, y):

    x = x.astype(np.complex64)
    y = y.astype(np.complex64)

    if (x.shape != y.shape):
        return -1

    plan = fft.Plan(x.shape, np.complex64, np.complex64)
    inverse_plan = fft.Plan(x.shape, np.complex64, np.complex64)

    x_gpu = gpuarray.to_gpu(x)
    y_gpu = gpuarray.to_gpu(y)

    x_fft = gpuarray.empty_like(x_gpu, dtype=np.complex64)
    y_fft = gpuarray.empty_like(y_gpu, dtype=np.complex64)
    out_gpu = gpuarray.empty_like(x_gpu, dtype=np.complex64)

    fft.fft(x_gpu, x_fft, plan)
    fft.fft(y_gpu, y_fft, plan)

    linalg.multiply(x_fft, y_fft, overwrite=True)
    fft.ifft(y_fft, out_gpu, inverse_plan, scale=True)
    conv_out = out_gpu.get()

    x_gpu.gpudata.free()
    y_gpu.gpudata.free()
    x_fft.gpudata.free()
    y_fft.gpudata.free()
    out_gpu.gpudata.free()

    return conv_out
Exemplo n.º 4
0
    def propagate_eager(self, wavelength, wavefront):
        """
        'Not-Too-Good' version of the propagation on the GPU (lots of Memory issues...)
        Remove in the future
        :param wavelength:
        :param wavefront:
        :return:
        """

        N = self.N_PIX
        # free, total = cuda.mem_get_info()
        free, total = cuda.mem_get_info()
        print("Free: %.2f percent" % (free / total * 100))

        # Pupil Plane -> Image Slicer
        complex_pupil = self.pupil_masks[wavelength] * np.exp(
            1j * 2 * np.pi * self.pupil_masks[wavelength] / wavelength)
        complex_pupil_gpu = gpuarray.to_gpu(
            np.asarray(complex_pupil, np.complex64))
        plan = cu_fft.Plan(complex_pupil_gpu.shape, np.complex64, np.complex64)
        cu_fft.fft(complex_pupil_gpu, complex_pupil_gpu, plan, scale=True)

        # Add N_slices copies to be Masked
        complex_slicer_cpu = complex_pupil_gpu.get()
        complex_pupil_gpu.gpudata.free()

        free, total = cuda.mem_get_info()
        print("*Free: %.2f percent" % (free / total * 100))

        complex_slicer_cpu = np.stack([complex_slicer_cpu] * self.N_slices)
        complex_slicer_gpu = gpuarray.to_gpu(complex_slicer_cpu)
        slicer_masks_gpu = gpuarray.to_gpu(self.slicer_masks_fftshift)
        clinalg.multiply(slicer_masks_gpu, complex_slicer_gpu, overwrite=True)
        slicer_masks_gpu.gpudata.free()
        free, total = cuda.mem_get_info()
        print("**Free: %.2f percent" % (free / total * 100))

        # Slicer -> Pupil Mirror
        plan = cu_fft.Plan((N, N), np.complex64, np.complex64, self.N_slices)
        cu_fft.ifft(complex_slicer_gpu, complex_slicer_gpu, plan, scale=True)
        mirror_mask_gpu = gpuarray.to_gpu(self.pupil_mirror_masks_fft)
        clinalg.multiply(mirror_mask_gpu, complex_slicer_gpu, overwrite=True)

        # Pupil Mirror -> Slits
        cu_fft.fft(complex_slicer_gpu, complex_slicer_gpu, plan)
        slits = complex_slicer_gpu.get()
        complex_slicer_gpu.gpudata.free()
        mirror_mask_gpu.gpudata.free()
        slit = fftshift(np.sum((np.abs(slits))**2, axis=0))

        free, total = cuda.mem_get_info()
        print("***Free: %.2f percent" % (free / total * 100))

        return slit
Exemplo n.º 5
0
    def __init__(self, nx, ny):

        shapeX = [ny, nx]
        shapeK = [ny, nx // 2 + 1]

        self.shapeX = shapeX
        self.shapeK = shapeK

        self.fftplan = skfft.Plan(self.shapeX, np.float32, np.complex64)
        self.ifftplan = skfft.Plan(self.shapeX, np.complex64, np.float32)

        self.coef_norm = nx * ny
Exemplo n.º 6
0
    def __init__(self, mesh, context=None):
        '''
        Args:
            mesh The mesh on which the solver will operate. The dimensionality
                 is deducted from mesh.dimension
        '''
        # create the mesh grid and compute the greens function on it
        self.mesh = mesh
        self._context = context
        mesh_shape = self.mesh.shape # nz, ny, (nx)
        mesh_shape2 = [2*n for n in mesh_shape] # 2*nz, 2*ny, (2*nx)
        mesh_distances = list(reversed(self.mesh.distances)) #dz, dy, dx
        self.fgreentr = gpuarray.empty(mesh_shape2,
                        dtype=np.complex128)
        self.tmpspace = gpuarray.zeros_like(self.fgreentr)
        sizeof_complex = np.dtype(np.complex128).itemsize

        # dimensionality function dispatch
        dim = self.mesh.dimension
        self._fgreen = getattr(self, '_fgreen' + str(dim) + 'd')
        self._mirror = getattr(self, '_mirror' + str(dim) + 'd')
        copy_fn = {'3d' : get_Memcpy3D_d2d, '2d': get_Memcpy2D_d2d}
        memcpy_nd = copy_fn[str(dim) + 'd']
        dim_args = self.mesh.shape
        self._cpyrho2tmp = memcpy_nd(
            src=None, dst=self.tmpspace, # None because src(rho) not yet known
            src_pitch=self.mesh.nx*sizeof_complex,
            dst_pitch=2*self.mesh.nx*sizeof_complex,
            dim_args=dim_args,
            itemsize=np.dtype(np.complex128).itemsize,
            src_height=self.mesh.ny,
            dst_height=2*self.mesh.ny)
        self._cpytmp2rho = memcpy_nd(
            src=self.tmpspace, dst=None, # None because dst(rho) not yet know
            src_pitch=2*self.mesh.nx*sizeof_complex,
            dst_pitch=self.mesh.nx*sizeof_complex,
            dim_args=dim_args,
            itemsize=np.dtype(np.complex128).itemsize,
            src_height=2*self.mesh.ny,
            dst_height=self.mesh.ny)
        mesh_arr = [-mesh_distances[i]/2 + np.arange(mesh_shape[i]+1)
                                            * mesh_distances[i]
                    for i in xrange(self.mesh.dimension)
                   ]
        # mesh_arr is [mz, my, mx]
        mesh_grids = np.meshgrid(*mesh_arr, indexing='ij')
        fgreen = self._fgreen(*mesh_grids)
        fgreen = self._mirror(fgreen)
        self.plan_forward = cu_fft.Plan(self.tmpspace.shape, in_dtype=np.complex128,
                                        out_dtype=np.complex128)
        self.plan_backward = cu_fft.Plan(self.tmpspace.shape, in_dtype=np.complex128,
                                         out_dtype=np.complex128)
        cu_fft.fft(gpuarray.to_gpu(fgreen), self.fgreentr, plan=self.plan_forward)
Exemplo n.º 7
0
    def set_focal_series(self, fs, defoci=None):
        """
        Setting the focal series also sets important parameters as the padding
        and defocus values.

        Parameters
        ----------
        fs : Hyperspy Image
         Mandatory! A focal series with all the appropriate parameters. Padding
         is read from metadata.

        defoci : array
         Optional, use it to explicitly set defoci values. Must have a dimension
         equal to the navigation axis of fs.
         """
        # Real space parameters
        Nz, Ny, Nx = fs.data.shape
        if defoci is None:
            defoci = fs.axes_manager.navigation_axes[0].axis

        Nz = len(defoci)
        self.zdim = defoci
        self.k2 = fs.get_fourier_space()
        self.Nz = Nz

        # In the middle of the focal series is the reference plane
        Nz_half = np.ceil(Nz / 2).astype('int') - 1

        # Pad mask
        Npy, Npx = fs.metadata.Signal.pad_tuple
        mask = np.zeros((Nz, Ny, Nx), dtype=np.bool)
        mask[:, Npy[0]:(Ny - Npy[1]), Npx[0]:(Nx - Npx[1])] = True

        # Set some parameters
        self.new_wave = fs._get_signal_signal()
        self.new_wave.metadata = fs.metadata.deepcopy()
        self.shape = (Nz, Ny, Nx)

        # Allocate things ...
        if self.using_gpu:
            # ... in GPU!
            self.Iexp = to_gpu_f(fs.data)
            self.mask = to_gpu_b(mask)

            # - The plans for FFT
            self.pft3dcc = cu_fft.Plan((Ny, Nx), np.complex64, np.complex64,
                                       Nz)
            self.pft2dcc = cu_fft.Plan((Ny, Nx), np.complex64, np.complex64)
        else:
            # ... in CPU!
            self.Iexp = fs.data.astype('complex64')
            self.mask = mask
Exemplo n.º 8
0
    def filter(self):
        import pycuda.gpuarray as gpuarray
        import skcuda.fft as cu_fft
        import skcuda.linalg as linalg
        import pycuda.driver as cuda
        from pycuda.tools import make_default_context
        cuda.init()
        context = make_default_context()
        device = context.get_device()
        signal = self.series[0]
        window = self.series[1]
        linalg.init()
        nfft = determine_size(len(signal) + len(window) - 1)
        # Move data to GPU
        sig_zero_pad = np.zeros(nfft, dtype=self.precision['float'])
        win_zero_pad = np.zeros(nfft, dtype=self.precision['float'])
        sig_gpu = gpuarray.zeros(sig_zero_pad.shape,
                                 dtype=self.precision['float'])
        win_gpu = gpuarray.zeros(win_zero_pad.shape,
                                 dtype=self.precision['float'])
        sig_zero_pad[0:len(signal)] = signal
        win_zero_pad[0:len(window)] = window
        sig_gpu.set(sig_zero_pad)
        win_gpu.set(win_zero_pad)

        # Plan forwards
        sig_fft_gpu = gpuarray.zeros(nfft, dtype=self.precision['complex'])
        win_fft_gpu = gpuarray.zeros(nfft, dtype=self.precision['complex'])
        sig_plan_forward = cu_fft.Plan(sig_fft_gpu.shape,
                                       self.precision['float'],
                                       self.precision['complex'])
        win_plan_forward = cu_fft.Plan(win_fft_gpu.shape,
                                       self.precision['float'],
                                       self.precision['complex'])
        cu_fft.fft(sig_gpu, sig_fft_gpu, sig_plan_forward)
        cu_fft.fft(win_gpu, win_fft_gpu, win_plan_forward)

        # Convolve
        out_fft = linalg.multiply(sig_fft_gpu, win_fft_gpu, overwrite=True)
        linalg.scale(2.0, out_fft)

        # Plan inverse
        out_gpu = gpuarray.zeros_like(out_fft)
        plan_inverse = cu_fft.Plan(out_fft.shape, self.precision['complex'],
                                   self.precision['complex'])
        cu_fft.ifft(out_fft, out_gpu, plan_inverse, True)
        out_np = np.zeros(len(out_gpu), self.precision['complex'])
        out_gpu.get(out_np)
        context.pop()
        return out_np
Exemplo n.º 9
0
def cuda_efftn(H, axes, forward):
    hShape = H.shape
    hDim = len(hShape)
    fftDim = len(axes)

    # Reshape 'axes' to be the array's end dimensions and ensure contiguity
    H = np.ascontiguousarray(
        np.moveaxis(H, axes, np.arange(hDim - fftDim, hDim, 1)))

    # Calculate number of batches
    batchSize = 1
    for i in range(hDim - fftDim):
        batchSize *= H.shape[i]

    # Reshape to accomodate batching
    H = np.reshape(
        H,
        (batchSize, H.shape[hDim - 3], H.shape[hDim - 2], H.shape[hDim - 1]))

    # Pass array to the GPU and perform iFFT on each batch
    H_gpu = gpuarray.to_gpu(H)
    plan = skfft.Plan(H_gpu.shape[1:fftDim + 1], H.dtype, H.dtype,
                      H_gpu.shape[0])

    if forward:
        skfft.fft(H_gpu, H_gpu, plan)
    else:
        skfft.ifft(H_gpu, H_gpu, plan, True)

    # Reshape to original dimensions
    H = np.moveaxis(H_gpu.get(), 0, fftDim)
    H = np.reshape(H, hShape)

    return H
Exemplo n.º 10
0
def ifft2_gpu(y, fftshift=False):
    """
    C2C iFFT
    do numpy.fft.ifft2 
    The input y is a 2D complex numpy array 
    """

    #get the shape of the initial numpy array
    n1, n2 = y.shape

    #from numpy array to GPUarray. Take the only first n2/2+1 non-redundant FFT coefficients when R2C.
    # For C2C, the dimensions of input and output are the same.
    #if fftshift is False:
    #    y2 = np.asarray(y[:,0:n2//2+1],np.complex64)
    #else:
    #    y2 = np.asarray(np.fft.ifftshift(y)[:,0:n2//2+1],np.complex64)
    if fftshift:
        y2 = np.fft.ifftshift(y)
    else:
        y2 = y
    ygpu = gpuarray.to_gpu(y2)

    #Initialise empty output GPUarray
    x = gpuarray.empty((n1, n2), np.complex128)

    #inverse FFT
    plan_backward = cu_fft.Plan((n1, n2), np.complex128, np.complex128)
    cu_fft.ifft(ygpu, x, plan_backward)

    #Must divide by the total number of pixels in the image to get the normalization right
    xout = x.get() / n1 / n2

    return xout
def ifft2_gpu(y, fftshift=False):
    ''' This function produce an output that is 
    completely compatible with numpy.fft.ifft2
    The input y is a 2D complex numpy array'''

    # Convert the input array to complex64
    if y.dtype != 'complex64':
        y = y.astype('complex64')

    # Get the shape of the initial numpy array
    n1, n2 = y.shape

    # From numpy array to GPUarray. Take only the first n2/2+1 non redundant FFT coefficients
    if fftshift is False:
        y2 = np.asarray(y[:, 0:n2 // 2 + 1], np.complex64)
    else:
        y2 = np.asarray(np.fft.ifftshift(y)[:, :n2 // 2 + 1], np.complex64)
    ygpu = gpuarray.to_gpu(y2)

    # Initialise empty output GPUarray
    x = gpuarray.empty((n1, n2), np.float32)

    # Inverse FFT
    plan_backward = cu_fft.Plan((n1, n2), np.complex64, np.float32)
    cu_fft.ifft(ygpu, x, plan_backward)

    # Must divide by the total number of pixels in the image to get the normalisation right
    xout = x.get() / n1 / n2

    return xout
Exemplo n.º 12
0
 def setup_mesh(self, mesh):
     '''Create the meshgrid, compute and store integrated Green's
     function from mesh distances.
     Only accepts meshes with same shape as self.mesh .
     '''
     assert (mesh.shape == self.mesh.shape)
     self.mesh = mesh
     mesh_arr = [
         -mesh.distances[i]/2 +
         np.arange(mesh.shape_r[i] + 1.) * mesh.distances[i]
         for i in [1, 0]
        ]
     # mesh_arr is [my, mx]
     mesh_grids = np.meshgrid(*mesh_arr, indexing='ij') #choose my, mx
     fgreen2 = self._fgreen(*mesh_grids)
     fgreen2 = self._mirror(fgreen2)
     # tile in 3d dimension, yields to memerror, uses huuge amount of memory!
     #fgreen = np.tile(fgreen, (mesh.nz, 2*mesh.ny, 2*mesh.nx))
     if self.save_memory:
         plan_2d = cu_fft.Plan([2*self.mesh.ny, 2*self.mesh.nx],
             in_dtype=np.complex128, out_dtype=np.complex128)
         cu_fft.fft(gpuarray.to_gpu(fgreen2), self.fgreentr, plan=plan_2d)
     else:
         fgreen = np.empty(shape=(mesh.nz, 2*mesh.ny, 2*mesh.nx),
             dtype=np.complex128)
         for nn in range(mesh.nz):
             fgreen[nn,:,:] = fgreen2
         cu_fft.fft(gpuarray.to_gpu(fgreen), self.fgreentr,
             plan=self.plan_forward)
Exemplo n.º 13
0
def do_ffts(img):

    if img.dtype != 'float32':
        img = img.astype('float32')

    sx, sy = img.shape  # or the convention (width x height) vs (rows x columns)
    # is sure to bite you

    ## Prepare and run CUDA FFT on image
    # See https://www.idtools.com.au/gpu-accelerated-fft-compatible-numpy/
    time0 = time.time()

    # Initialise CUDA input GPUArray
    x_gpu = gpuarray.to_gpu(img)

    # Initialise output GPUarray
    # N/2+1 non-redundant coefficients of a length-N input signal
    y_gpu = gpuarray.empty((sx, sy // 2 + 1), np.complex64)

    # Plan and run Cuda fft
    plan_fft = cu_fft.Plan((sx, sy), np.float32, np.complex64)
    cu_fft.fft(x_gpu, y_gpu, plan_fft)
    gpu_fft = y_gpu.get()
    gpu_time = time.time() - time0

    print(f'GPU FFT preparation, execution and retrieval in {gpu_time:6.4f} s')

    ## Run np fft
    time0 = time.time()
    cpu_fft = np.fft.fft2(img)
    cpu_time = time.time() - time0
    print(f'NumPY CPU FFT in {cpu_time:6.4f} s')

    return (gpu_fft, cpu_fft, gpu_time, cpu_time)
Exemplo n.º 14
0
    def __init__(self, mesh, context=None, save_memory=True):
        '''
        Args:
            mesh The mesh on which the solver will operate. The dimensionality
                 is deducted from mesh.dimension
            save_memory: Decide whether to store all slices of the transformed
                 greens function (more memory but faster) or save 1 slice only
                 (saves memory but slower, default)
        '''
        # create the mesh grid and compute the greens function on it
        if (mesh.dimension != 3):
            print ('Error: Use a 3d mesh for the 2.5d algorithm!. Abort.')
            return None
        self.is_25D = True
        self.save_memory = save_memory

        self.mesh = mesh
        self._context = context
        nz, ny, nx = mesh.shape
        mesh_shape2 = [2*n for n in mesh.shape] # 2*nz, 2*ny, (2*nx)
        if save_memory:
            self.fgreentr = gpuarray.empty((2*ny, 2*nx),
                            dtype=np.complex128)
            self._solve_kernel = self._solve_kernel_slow
        else:
            self.fgreentr = gpuarray.empty((nz, 2*ny, 2*nx),
                            dtype=np.complex128)
            self._solve_kernel = self._solve_kernel_fast
        self.tmpspace = gpuarray.zeros((nz, 2*ny, 2*nx), dtype=np.complex128)
        sizeof_complex = np.dtype(np.complex128).itemsize

        # dimensionality function dispatch
        self._fgreen = getattr(self, '_fgreen25d')
        self._mirror = getattr(self, '_mirror2d')
        #copy_fn = {'3d' : get_Memcpy3D_d2d, '2d': get_Memcpy2D_d2d}
        memcpy_nd = get_Memcpy3D_d2d
        #memcpy_nd = copy_fn[str(dim) + 'd']
        dim_args = mesh.shape
        self._cpyrho2tmp = memcpy_nd(
            src=None, dst=self.tmpspace, # None because src(rho) not yet known
            src_pitch=mesh.nx*sizeof_complex,
            dst_pitch=2*mesh.nx*sizeof_complex,
            dim_args=dim_args,
            itemsize=np.dtype(np.complex128).itemsize,
            src_height=mesh.ny,
            dst_height=2*mesh.ny)
        self._cpytmp2rho = memcpy_nd(
            src=self.tmpspace, dst=None, # None because dst(rho) not yet known
            src_pitch=2*mesh.nx*sizeof_complex,
            dst_pitch=mesh.nx*sizeof_complex,
            dim_args=dim_args,
            itemsize=np.dtype(np.complex128).itemsize,
            src_height=2*mesh.ny,
            dst_height=mesh.ny)
        self.plan_forward = cu_fft.Plan([2*mesh.ny, 2*mesh.nx],
            in_dtype=np.complex128, out_dtype=np.complex128, batch=mesh.nz)
        self.plan_backward = self.plan_forward

        self.setup_mesh(mesh)
Exemplo n.º 15
0
 def test_fft_float32_to_complex64_1d(self):
     x = np.asarray(np.random.rand(self.N), np.float32)
     xf = np.fft.rfftn(x)
     x_gpu = gpuarray.to_gpu(x)
     xf_gpu = gpuarray.empty(self.N // 2 + 1, np.complex64)
     plan = fft.Plan(x.shape, np.float32, np.complex64)
     fft.fft(x_gpu, xf_gpu, plan)
     assert np.allclose(xf, xf_gpu.get(), atol=atol_float32)
Exemplo n.º 16
0
 def test_multiple_streams(self):
     x = np.asarray(np.random.rand(self.N), np.float32)
     xf = np.fft.rfftn(x)
     y = np.asarray(np.random.rand(self.N), np.float32)
     yf = np.fft.rfftn(y)
     x_gpu = gpuarray.to_gpu(x)
     y_gpu = gpuarray.to_gpu(y)
     xf_gpu = gpuarray.empty(self.N // 2 + 1, np.complex64)
     yf_gpu = gpuarray.empty(self.N // 2 + 1, np.complex64)
     stream0 = drv.Stream()
     stream1 = drv.Stream()
     plan1 = fft.Plan(x.shape, np.float32, np.complex64, stream=stream0)
     plan2 = fft.Plan(y.shape, np.float32, np.complex64, stream=stream1)
     fft.fft(x_gpu, xf_gpu, plan1)
     fft.fft(y_gpu, yf_gpu, plan2)
     assert np.allclose(xf, xf_gpu.get(), atol=atol_float32)
     assert np.allclose(yf, yf_gpu.get(), atol=atol_float32)
Exemplo n.º 17
0
 def test_ifft_complex128_to_float64_1d(self):
     x = np.asarray(np.random.rand(self.N), np.float64)
     xf = np.asarray(np.fft.rfftn(x), np.complex128)
     xf_gpu = gpuarray.to_gpu(xf)
     x_gpu = gpuarray.empty(self.N, np.float64)
     plan = fft.Plan(x.shape, np.complex128, np.float64)
     fft.ifft(xf_gpu, x_gpu, plan, True)
     assert np.allclose(x, x_gpu.get(), atol=atol_float64)
Exemplo n.º 18
0
 def test_batch_fft_float32_to_complex64_2d(self):
     x = np.asarray(np.random.rand(self.B, self.N, self.M), np.float32)
     xf = np.fft.rfftn(x, axes=(1,2))
     x_gpu = gpuarray.to_gpu(x)
     xf_gpu = gpuarray.empty((self.B, self.N, self.M//2+1), np.complex64)
     plan = fft.Plan([self.N, self.M], np.float32, np.complex64, batch=self.B)
     fft.fft(x_gpu, xf_gpu, plan)
     np.testing.assert_allclose(xf, xf_gpu.get(), atol=atol_float32)
Exemplo n.º 19
0
def _get_inv_plan(itype, otype, outlen, batch=1):
    try:
        theplan = _reverse_plans[(itype, otype, outlen, batch)]
    except KeyError:
        theplan = cu_fft.Plan((outlen, ), itype, otype, batch=batch)
        _reverse_plans.update({(itype, otype, outlen): theplan})

    return theplan
Exemplo n.º 20
0
def _get_fwd_plan(itype, otype, inlen, batch=1):
    try:
        theplan = _forward_plans[(itype, otype, inlen, batch)]
    except KeyError:
        theplan = cu_fft.Plan((inlen, ), itype, otype, batch=batch)
        _forward_plans.update({(itype, otype, inlen): theplan})

    return theplan
Exemplo n.º 21
0
 def test_batch_fft_float64_to_complex128_1d(self):
     x = np.asarray(np.random.rand(self.B, self.N), np.float64)
     xf = np.fft.rfft(x, axis=1)
     x_gpu = gpuarray.to_gpu(x)
     xf_gpu = gpuarray.empty((self.B, self.N // 2 + 1), np.complex128)
     plan = fft.Plan(x.shape[1], np.float64, np.complex128, batch=self.B)
     fft.fft(x_gpu, xf_gpu, plan)
     assert np.allclose(xf, xf_gpu.get(), atol=atol_float64)
Exemplo n.º 22
0
def filter_fft_cuda(signal: np.array, window: np.array, prec: dict):
    """
    Computes the low_pass filter using the numpy pycuda method.
    Also auto-inits the pycuda library
    :param signal: The input series
    :param window: The input window
    :param prec: The precision entry
    :return: The filtered signal
    """
    import pycuda.autoinit  # Here because it initialises a new cuda environment every trial.
    import pycuda.gpuarray as gpuarray
    import skcuda.fft as cu_fft
    import skcuda.linalg as linalg
    linalg.init()
    nfft = determine_size(len(signal) + len(window) - 1)
    # Move data to GPU
    sig_zero_pad = np.zeros(nfft, dtype=prec['float'])
    win_zero_pad = np.zeros(nfft, dtype=prec['float'])
    sig_gpu = gpuarray.zeros(sig_zero_pad.shape, dtype=prec['float'])
    win_gpu = gpuarray.zeros(win_zero_pad.shape, dtype=prec['float'])
    sig_zero_pad[0:len(signal)] = signal
    win_zero_pad[0:len(window)] = window
    sig_gpu.set(sig_zero_pad)
    win_gpu.set(win_zero_pad)

    # Plan forwards
    sig_fft_gpu = gpuarray.zeros(nfft, dtype=prec['complex'])
    win_fft_gpu = gpuarray.zeros(nfft, dtype=prec['complex'])
    sig_plan_forward = cu_fft.Plan(sig_fft_gpu.shape, prec['float'],
                                   prec['complex'])
    win_plan_forward = cu_fft.Plan(win_fft_gpu.shape, prec['float'],
                                   prec['complex'])
    cu_fft.fft(sig_gpu, sig_fft_gpu, sig_plan_forward)
    cu_fft.fft(win_gpu, win_fft_gpu, win_plan_forward)

    # Convolve
    out_fft = linalg.multiply(sig_fft_gpu, win_fft_gpu, overwrite=True)
    linalg.scale(2.0, out_fft)

    # Plan inverse
    out_gpu = gpuarray.zeros_like(out_fft)
    plan_inverse = cu_fft.Plan(out_fft.shape, prec['complex'], prec['complex'])
    cu_fft.ifft(out_fft, out_gpu, plan_inverse, True)
    out_np = np.zeros(len(out_gpu), prec['complex'])
    out_gpu.get(out_np)
    return out_np
Exemplo n.º 23
0
 def test_fft_float64_to_complex128_2d(self):
     x = np.asarray(np.random.rand(self.N, self.M), np.float64)
     xf = np.fft.rfftn(x)
     x_gpu = gpuarray.to_gpu(x)
     xf_gpu = gpuarray.empty((self.N, self.M // 2 + 1), np.complex128)
     plan = fft.Plan(x.shape, np.float64, np.complex128)
     fft.fft(x_gpu, xf_gpu, plan)
     assert np.allclose(xf, xf_gpu.get(), atol=atol_float64)
Exemplo n.º 24
0
 def test_work_area(self):
     x = np.asarray(np.random.rand(self.N), np.float32)
     xf = np.fft.rfftn(x)
     x_gpu = gpuarray.to_gpu(x)
     xf_gpu = gpuarray.empty(self.N // 2 + 1, np.complex64)
     plan = fft.Plan(x.shape, np.float32, np.complex64, auto_allocate=False)
     work_area = gpuarray.empty((plan.worksize, ), np.uint8)
     plan.set_work_area(work_area)
     fft.fft(x_gpu, xf_gpu, plan)
     assert np.allclose(xf, xf_gpu.get(), atol=atol_float32)
Exemplo n.º 25
0
    def test_batch_ifft_complex128_to_float64_1d(self):

        # Note that since rfftn returns a Fortran-ordered array, it
        # needs to be reformatted as a C-ordered array before being
        # passed to gpuarray.to_gpu:
        x = np.asarray(np.random.rand(self.B, self.N), np.float64)
        xf = np.asarray(np.fft.rfft(x, axis=1), np.complex128)
        xf_gpu = gpuarray.to_gpu(np.ascontiguousarray(xf))
        x_gpu = gpuarray.empty((self.B, self.N), np.float64)
        plan = fft.Plan(x.shape[1], np.complex128, np.float64, batch=self.B)
        fft.ifft(xf_gpu, x_gpu, plan, True)
        assert np.allclose(x, x_gpu.get(), atol=atol_float64)
Exemplo n.º 26
0
    def test_ifft_complex64_to_float32_2d(self):

        # Note that since rfftn returns a Fortran-ordered array, it
        # needs to be reformatted as a C-ordered array before being
        # passed to gpuarray.to_gpu:
        x = np.asarray(np.random.rand(self.N, self.M), np.float32)
        xf = np.asarray(np.fft.rfftn(x), np.complex64)
        xf_gpu = gpuarray.to_gpu(np.ascontiguousarray(xf))
        x_gpu = gpuarray.empty((self.N, self.M), np.float32)
        plan = fft.Plan(x.shape, np.complex64, np.float32)
        fft.ifft(xf_gpu, x_gpu, plan, True)
        assert np.allclose(x, x_gpu.get(), atol=atol_float32)
Exemplo n.º 27
0
    def allocate_grid(self, **kwargs):
        self.nf = kwargs.get('nf', self.nf)

        assert (self.nf is not None)

        self.n = int(self.sigma * self.nf)
        self.ghat_g = gpuarray.zeros(self.n, dtype=self.complex_type)
        self.cu_plan = cufft.Plan(self.n,
                                  self.complex_type,
                                  self.complex_type,
                                  stream=self.stream)
        return self
Exemplo n.º 28
0
 def irfft(a, normalize=True, nthreads=0):
     if is_memory_enough(a):
         arg = gpuarray.to_gpu(a)
         shape = [s for s in a.shape]
         shape[-1] = (shape[-1]-1)*2
         rtype = G_CTYPES[a.dtype.type]
         afg = gpuarray.empty(shape, rtype)
         plan = fft.Plan(shape, a.dtype.type, rtype)
         fft.ifft(arg, afg, plan)
         return afg.get()
     else:
         return _irfft(a)
Exemplo n.º 29
0
 def rfft(a, nthreads=0):
     if is_memory_enough(a):
         arg = gpuarray.to_gpu(a)
         shape = [s for s in a.shape]
         shape[-1] = shape[-1]//2 + 1
         ctype = G_RTYPES[a.dtype.type]
         afg = gpuarray.empty(shape, ctype)
         plan = fft.Plan(shape, a.dtype.type, ctype)
         print(shape, a.dtype.type, ctype)
         fft.fft(arg, afg, plan)
         return afg.get()
     else:
         return _rfft(a)
Exemplo n.º 30
0
    def __init__(self, Nx, xmax, **kwargs):
        """__init__(self, Nx, xmax, **kwargs) -> None
        initialize this class.

        Parameters
        ----------
        Nx     : int
            the number of grid points
        xmax   : float
            the maximum of space in the x direction.
            The spatial resolution is defined as 2*xmax/Nx.
        kwargs : options
            fft_type : str (default : 'numpy')
                'numpy' : use fft methods in the numpy module
                'fftw'  : use fft methods in the pyfftw module if supported
                'cufft' : use fft methods in the cufft module if supported
            others : See the following classes or methods.
                space class
                InitPhoton()
                InitMSFT()
                InitAxes()
                InitDensity()
        """
        space.__init__(self, Nx, xmax, **kwargs)
        self.InitPhoton(**kwargs)
        self.InitMSFT(**kwargs)
        self.InitAxes(**kwargs)
        self.InitDensity(**kwargs)

        # Check the validity of `fft_type`
        if kwargs.get('fft_type') is None:
            self.__fft_type = 'cufft'
        elif kwargs.get('fft_type') not in ['numpy', 'fftw', 'cufft']:
            raise ValueError('Invalid value for the keyword "fft_type."')
        else:
            self.__fft_type = kwargs.get('fft_type')

        # Check the validity of cufft
        if self.__fft_type == 'cufft':
            if found_cufft is True:  # in case of the existence of CUDA
                buff = self.mesh_space()[0].shape
                self.__x_gpu = gpuarray.empty(buff, np.complex64)
                self.__xf_gpu = gpuarray.empty(buff, np.complex64)
                self.__plan = cu_fft.Plan(buff, np.complex64, np.complex64)
            else:  # change into fftw
                self.__fft_type = 'fftw'

        # Check the validity of fftw
        if self.__fft_type == 'fftw':
            if found_pyfftw is False:  # change into numpy
                self.__fft_type = 'numpy'