def shift_interp_field_gpu(self, field_array, n_move): """ Shift the field 'field_array' by n_move cells (backwards) on the GPU by applying a kernel that copies the shifted fields to a buffer array. Parameters ---------- field_array: 2darray of complexs Contains the value of the fields, and is modified by this function n_move: int The number of cells by which the grid should be shifted Returns ------- The new shifted field array """ # Get a 2D CUDA grid of the size of the grid dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(field_array.shape[0], field_array.shape[1]) # Initialize a field buffer to temporarily store the data field_buffer = cuda.device_array( (field_array.shape[0], field_array.shape[1]), dtype=np.complex128) # Shift the field array and copy it to the buffer shift_field_array_gpu[dim_grid_2d, dim_block_2d](field_array, field_buffer, n_move) # Assign the buffer to the original field array object field_array = field_buffer # Return the new shifted field array return (field_array)
def erase(self, fieldtype ) : """ Sets the field `fieldtype` to zero on the interpolation grid Parameter --------- fieldtype : string A string which represents the kind of field to be erased (either 'E', 'B', 'J', 'rho') """ if self.use_cuda : # Obtain the cuda grid dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr ) # Erase the arrays on the GPU if fieldtype == 'rho' : cuda_erase_scalar[dim_grid, dim_block]( self.interp[0].rho, self.interp[1].rho ) elif fieldtype == 'J' : cuda_erase_vector[dim_grid, dim_block]( self.interp[0].Jr, self.interp[1].Jr, self.interp[0].Jt, self.interp[1].Jt, self.interp[0].Jz, self.interp[1].Jz ) elif fieldtype == 'E' : cuda_erase_vector[dim_grid, dim_block]( self.interp[0].Er, self.interp[1].Er, self.interp[0].Et, self.interp[1].Et, self.interp[0].Ez, self.interp[1].Ez ) elif fieldtype == 'B' : cuda_erase_vector[dim_grid, dim_block]( self.interp[0].Br, self.interp[1].Br, self.interp[0].Bt, self.interp[1].Bt, self.interp[0].Bz, self.interp[1].Bz ) else : raise ValueError('Invalid string for fieldtype: %s'%fieldtype) else : # Erase the arrays on the CPU if fieldtype == 'rho' : for m in range(self.Nm) : self.interp[m].rho[:,:] = 0. elif fieldtype == 'J' : for m in range(self.Nm) : self.interp[m].Jr[:,:] = 0. self.interp[m].Jt[:,:] = 0. self.interp[m].Jz[:,:] = 0. elif fieldtype == 'E' : for m in range(self.Nm) : self.interp[m].Er[:,:] = 0. self.interp[m].Et[:,:] = 0. self.interp[m].Ez[:,:] = 0. elif fieldtype == 'B' : for m in range(self.Nm) : self.interp[m].Br[:,:] = 0. self.interp[m].Bt[:,:] = 0. self.interp[m].Bz[:,:] = 0. else : raise ValueError('Invalid string for fieldtype: %s'%fieldtype)
def filter(self, fieldtype) : """ Filter the field `fieldtype` Parameter --------- fieldtype : string A string which represents the kind of field to be filtered (either 'E', 'B', 'J', 'rho_next' or 'rho_prev') """ if self.use_cuda : # Obtain the cuda grid dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr) # Filter fields on the GPU if fieldtype == 'rho_prev' : cuda_filter_scalar[dim_grid, dim_block]( self.rho_prev, self.d_filter_array, self.Nz, self.Nr ) elif fieldtype == 'rho_next' : cuda_filter_scalar[dim_grid, dim_block]( self.rho_next, self.d_filter_array, self.Nz, self.Nr ) elif fieldtype == 'J' : cuda_filter_vector[dim_grid, dim_block]( self.Jp, self.Jm, self.Jz, self.d_filter_array, self.Nz, self.Nr) elif fieldtype == 'E' : cuda_filter_vector[dim_grid, dim_block]( self.Ep, self.Em, self.Ez, self.d_filter_array, self.Nz, self.Nr) elif fieldtype == 'B' : cuda_filter_vector[dim_grid, dim_block]( self.Bp, self.Bm, self.Bz, self.d_filter_array, self.Nz, self.Nr) else : raise ValueError('Invalid string for fieldtype: %s'%fieldtype) else : # Filter fields on the CPU if fieldtype == 'rho_prev' : self.rho_prev = self.rho_prev * self.filter_array elif fieldtype == 'rho_next' : self.rho_next = self.rho_next * self.filter_array elif fieldtype == 'J' : self.Jp = self.Jp * self.filter_array self.Jm = self.Jm * self.filter_array self.Jz = self.Jz * self.filter_array elif fieldtype == 'E' : self.Ep = self.Ep * self.filter_array self.Em = self.Em * self.filter_array self.Ez = self.Ez * self.filter_array elif fieldtype == 'B' : self.Bp = self.Bp * self.filter_array self.Bm = self.Bm * self.filter_array self.Bz = self.Bz * self.filter_array else : raise ValueError('Invalid string for fieldtype: %s'%fieldtype)
def push_rho(self) : """ Transfer the values of rho_next to rho_prev, and set rho_next to zero """ if self.use_cuda : # Obtain the cuda grid dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr) # Push the fields on the GPU cuda_push_rho[dim_grid, dim_block]( self.rho_prev, self.rho_next, self.Nz, self.Nr ) else : # Push the fields on the CPU self.rho_prev[:,:] = self.rho_next[:,:] self.rho_next[:,:] = 0.
def divide_by_volume( self, fieldtype ) : """ Divide the field `fieldtype` in each cell by the cell volume, on the interpolation grid. This is typically done for rho and J, after the charge and current deposition. Parameter --------- fieldtype : A string which represents the kind of field to be erased (either 'rho' or 'J') """ if self.use_cuda : # Perform division on the GPU dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr ) if fieldtype == 'rho' : cuda_divide_scalar_by_volume[dim_grid, dim_block]( self.interp[0].rho, self.interp[1].rho, self.interp[0].d_invvol, self.interp[1].d_invvol ) elif fieldtype == 'J' : cuda_divide_vector_by_volume[dim_grid, dim_block]( self.interp[0].Jr, self.interp[1].Jr, self.interp[0].Jt, self.interp[1].Jt, self.interp[0].Jz, self.interp[1].Jz, self.interp[0].d_invvol, self.interp[1].d_invvol ) else : raise ValueError('Invalid string for fieldtype: %s'%fieldtype) else : # Perform division on the CPU if fieldtype == 'rho' : for m in range(self.Nm) : self.interp[m].rho = \ self.interp[m].rho * self.interp[m].invvol[np.newaxis,:] elif fieldtype == 'J' : for m in range(self.Nm) : self.interp[m].Jr = \ self.interp[m].Jr * self.interp[m].invvol[np.newaxis,:] self.interp[m].Jt = \ self.interp[m].Jt * self.interp[m].invvol[np.newaxis,:] self.interp[m].Jz = \ self.interp[m].Jz * self.interp[m].invvol[np.newaxis,:] else : raise ValueError('Invalid string for fieldtype: %s'%fieldtype)
def correct_currents (self, dt, ps) : """ Correct the currents so that they satisfy the charge conservation equation Parameters ---------- dt : float Timestep of the simulation """ # Precalculate useful coefficient inv_dt = 1./dt if self.use_cuda : # Obtain the cuda grid dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr) # Correct the currents on the GPU if ps.V is None: # With standard PSATD algorithm cuda_correct_currents_standard[dim_grid, dim_block]( self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz, self.d_kz, self.d_kr, self.d_inv_k2, inv_dt, self.Nz, self.Nr ) else: # With Galilean/comoving algorithm cuda_correct_currents_comoving[dim_grid, dim_block]( self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz, self.d_kz, self.d_kr, self.d_inv_k2, ps.d_j_corr_coef, ps.d_T_eb, ps.d_T_cc, inv_dt, self.Nz, self.Nr) else : # Correct the currents on the CPU if ps.V is None: # With standard PSATD algorithm numba_correct_currents_standard( self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz, self.kz, self.kr, self.inv_k2, inv_dt, self.Nz, self.Nr ) else: # With Galilean/comoving algorithm numba_correct_currents_comoving( self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz, self.kz, self.kr, self.inv_k2, ps.j_corr_coef, ps.T_eb, ps.T_cc, inv_dt, self.Nz, self.Nr)
def damp_guard_EB(self, interp): """ Damp the fields E and B in the guard cells. Parameters ---------- interp: list of InterpolationGrid objects (one per azimuthal mode) Objects that contain the fields to be damped. """ # Damp the fields on the CPU or the GPU if interp[0].use_cuda: # Damp the fields on the GPU dim_grid, dim_block = cuda_tpb_bpg_2d(self.n_guard, interp[0].Nr) cuda_damp_EB[dim_grid, dim_block](interp[0].Er, interp[0].Et, interp[0].Ez, interp[0].Br, interp[0].Bt, interp[0].Bz, interp[1].Er, interp[1].Et, interp[1].Ez, interp[1].Br, interp[1].Bt, interp[1].Bz, self.d_left_damp, self.d_right_damp, self.n_guard) else: # Damp the fields on the CPU n_guard = self.n_guard for m in range(len(interp)): # Damp the fields in left guard cells interp[m].Er[:n_guard, :] *= self.left_damp[:, np.newaxis] interp[m].Et[:n_guard, :] *= self.left_damp[:, np.newaxis] interp[m].Ez[:n_guard, :] *= self.left_damp[:, np.newaxis] interp[m].Br[:n_guard, :] *= self.left_damp[:, np.newaxis] interp[m].Bt[:n_guard, :] *= self.left_damp[:, np.newaxis] interp[m].Bz[:n_guard, :] *= self.left_damp[:, np.newaxis] # Damp the fields in right guard cells interp[m].Er[-n_guard:, :] *= self.right_damp[::-1, np.newaxis] interp[m].Et[-n_guard:, :] *= self.right_damp[::-1, np.newaxis] interp[m].Ez[-n_guard:, :] *= self.right_damp[::-1, np.newaxis] interp[m].Br[-n_guard:, :] *= self.right_damp[::-1, np.newaxis] interp[m].Bt[-n_guard:, :] *= self.right_damp[::-1, np.newaxis] interp[m].Bz[-n_guard:, :] *= self.right_damp[::-1, np.newaxis]
def __init__(self, Nr, Nz, use_cuda=False, nthreads=4): """ Initialize an FFT object Parameters ---------- Nr: int Number of grid points along the r axis (axis -1) Nz: int Number of grid points along the z axis (axis 0) use_cuda: bool, optional Whether to perform the Fourier transform on the z axis nthreads : int, optional Number of threads for the FFTW transform """ # Check whether to use cuda self.use_cuda = use_cuda if (self.use_cuda is True) and (cuda_installed is False): self.use_cuda = False print('** Cuda not available for Fourier transform.') print('** Performing the Fourier transform on the CPU.') # Initialize the object for calculation on the GPU if self.use_cuda: # Initialize the dimension of the grid and blocks self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr) # Initialize 1d buffer for cufft self.buffer1d_in = cuda.device_array((Nz * Nr, ), dtype=np.complex128) self.buffer1d_out = cuda.device_array((Nz * Nr, ), dtype=np.complex128) # Initialize the cuda libraries object self.fft = cufft.FFTPlan(shape=(Nz, ), itype=np.complex128, otype=np.complex128, batch=Nr) self.blas = cublas.Blas() # For normalization of the iFFT self.inv_Nz = 1. / Nz # For normalization of the iFFT # Initialize the spectral buffers self.spect_buffer_r = cuda.device_array((Nz, Nr), dtype=np.complex128) self.spect_buffer_t = cuda.device_array((Nz, Nr), dtype=np.complex128) # Initialize the object for calculation on the CPU else: # First buffer and FFTW transform self.interp_buffer_r = \ pyfftw.n_byte_align_empty( (Nz,Nr), 16, 'complex128' ) self.spect_buffer_r = \ pyfftw.n_byte_align_empty( (Nz,Nr), 16, 'complex128' ) self.fft_r = pyfftw.FFTW(self.interp_buffer_r, self.spect_buffer_r, axes=(0, ), direction='FFTW_FORWARD', threads=nthreads) self.ifft_r = pyfftw.FFTW(self.spect_buffer_r, self.interp_buffer_r, axes=(0, ), direction='FFTW_BACKWARD', threads=nthreads) # Second buffer and FFTW transform self.interp_buffer_t = \ pyfftw.n_byte_align_empty( (Nz,Nr), 16, 'complex128' ) self.spect_buffer_t = \ pyfftw.n_byte_align_empty( (Nz,Nr), 16, 'complex128' ) self.fft_t = pyfftw.FFTW(self.interp_buffer_t, self.spect_buffer_t, axes=(0, ), direction='FFTW_FORWARD', threads=nthreads) self.ifft_t = pyfftw.FFTW(self.spect_buffer_t, self.interp_buffer_t, axes=(0, ), direction='FFTW_BACKWARD', threads=nthreads)
def deposit(self, fld, fieldtype): """ Deposit the particles charge or current onto the grid This assumes that the particle positions (and momenta in the case of J) are currently at the same timestep as the field that is to be deposited Parameter ---------- fld : a Field object Contains the list of InterpolationGrid objects with the field values as well as the prefix sum. fieldtype : string Indicates which field to deposit Either 'J' or 'rho' """ # Shortcut for the list of InterpolationGrid objects grid = fld.interp if self.use_cuda == True: # Get the threads per block and the blocks per grid dim_grid_2d_flat, dim_block_2d_flat = cuda_tpb_bpg_1d(grid[0].Nz * grid[0].Nr) dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(grid[0].Nz, grid[0].Nr) # Create the helper arrays for deposition if self.particle_shape == 'linear_non_atomic': d_F0, d_F1, d_F2, d_F3 = cuda_deposition_arrays( grid[0].Nz, grid[0].Nr, fieldtype=fieldtype) # Sort the particles if self.sorted is False: self.sort_particles(fld=fld) # The particles are now sorted and rearranged self.sorted = True # Call the CUDA Kernel for the deposition of rho or J # for Mode 0 and 1 only. # Rho if fieldtype == 'rho': # Deposit rho in each of four directions if self.particle_shape == 'linear_non_atomic': deposit_rho_gpu[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, d_F0, d_F1, d_F2, d_F3, self.cell_idx, self.prefix_sum) # Add the four directions together add_rho[dim_grid_2d, dim_block_2d](grid[0].rho, grid[1].rho, d_F0, d_F1, d_F2, d_F3) elif self.particle_shape == 'cubic': deposit_rho_gpu_cubic[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].rho, grid[1].rho, self.cell_idx, self.prefix_sum) elif self.particle_shape == 'linear': deposit_rho_gpu_linear[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].rho, grid[1].rho, self.cell_idx, self.prefix_sum) else: raise ValueError( "`particle_shape` should be either 'linear', 'linear_atomic' \ or 'cubic' but is `%s`" % self.particle_shape) # J elif fieldtype == 'J': # Deposit J in each of four directions if self.particle_shape == 'linear_non_atomic': deposit_J_gpu[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, self.ux, self.uy, self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, d_F0, d_F1, d_F2, d_F3, self.cell_idx, self.prefix_sum) # Add the four directions together add_J[dim_grid_2d, dim_block_2d](grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt, grid[0].Jz, grid[1].Jz, d_F0, d_F1, d_F2, d_F3) elif self.particle_shape == 'cubic': deposit_J_gpu_cubic[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, self.ux, self.uy, self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt, grid[0].Jz, grid[1].Jz, self.cell_idx, self.prefix_sum) elif self.particle_shape == 'linear': deposit_J_gpu_linear[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, self.ux, self.uy, self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt, grid[0].Jz, grid[1].Jz, self.cell_idx, self.prefix_sum) else: raise ValueError("`particle_shape` should be either \ 'linear', 'linear_atomic' or 'cubic' \ but is `%s`" % self.particle_shape) else: raise ValueError("`fieldtype` should be either 'J' or \ 'rho', but is `%s`" % fieldtype) # CPU version else: # Preliminary arrays for the cylindrical conversion r = np.sqrt(self.x**2 + self.y**2) # Avoid division by 0. invr = 1. / np.where(r != 0., r, 1.) cos = np.where(r != 0., self.x * invr, 1.) sin = np.where(r != 0., self.y * invr, 0.) # Indices and weights if self.particle_shape == 'cubic': shape_order = 3 else: shape_order = 1 iz, Sz = weights(self.z, grid[0].invdz, grid[0].zmin, grid[0].Nz, direction='z', shape_order=shape_order) ir, Sr = weights(r, grid[0].invdr, grid[0].rmin, grid[0].Nr, direction='r', shape_order=shape_order) # Number of modes considered : # number of elements in the grid list Nm = len(grid) if fieldtype == 'rho': # --------------------------------------- # Deposit the charge density mode by mode # --------------------------------------- # Prepare auxiliary matrix exptheta = np.ones(self.Ntot, dtype='complex') # exptheta takes the value exp(im theta) throughout the loop for m in range(Nm): # Increment exptheta (notice the + : forward transform) if m == 1: exptheta[:].real = cos exptheta[:].imag = sin elif m > 1: exptheta[:] = exptheta * (cos + 1.j * sin) # Deposit the fields # (The sign -1 with which the guards are added is not # trivial to derive but avoids artifacts on the axis) deposit_field_numba(self.w * exptheta, grid[m].rho, iz, ir, Sz, Sr, -1.) elif fieldtype == 'J': # ---------------------------------------- # Deposit the current density mode by mode # ---------------------------------------- # Calculate the currents Jr = self.w * c * self.inv_gamma * (cos * self.ux + sin * self.uy) Jt = self.w * c * self.inv_gamma * (cos * self.uy - sin * self.ux) Jz = self.w * c * self.inv_gamma * self.uz # Prepare auxiliary matrix exptheta = np.ones(self.Ntot, dtype='complex') # exptheta takes the value exp(im theta) throughout the loop for m in range(Nm): # Increment exptheta (notice the + : forward transform) if m == 1: exptheta[:].real = cos exptheta[:].imag = sin elif m > 1: exptheta[:] = exptheta * (cos + 1.j * sin) # Deposit the fields # (The sign -1 with which the guards are added is not # trivial to derive but avoids artifacts on the axis) deposit_field_numba(Jr * exptheta, grid[m].Jr, iz, ir, Sz, Sr, -1.) deposit_field_numba(Jt * exptheta, grid[m].Jt, iz, ir, Sz, Sr, -1.) deposit_field_numba(Jz * exptheta, grid[m].Jz, iz, ir, Sz, Sr, -1.) else: raise ValueError( "`fieldtype` should be either 'J' or 'rho', but is `%s`" % fieldtype)
def __init__(self, p, Nr, Nz, rmax, method, use_cuda=False, **kw): """ Calculate the r (position) and nu (frequency) grid on which the transform will operate. Also store auxiliary data needed for the transform. Parameters : ------------ p : int Order of the Hankel transform Nr, Nz : float Number of points in the r direction and z direction rmax : float Edge of the box in which the Hankel transform is taken (The function is assumed to be zero at that point.) method : string The method used to calculate the Hankel transform use_cuda : bool, optional Whether to use the GPU for the Hankel transform (Only available for the MDHT method) tpb : int, optional Number of threads per block, in the case where cuda is used kw : optional arguments to be passed in the case of the MDHT """ # Check that the method is valid if (method in available_methods) == False: raise ValueError('Illegal method string') else: self.method = method # Register whether to use the GPU. # If yes, initialize the corresponding cuda stream self.use_cuda = use_cuda if (self.use_cuda == True) and (cuda_installed == False): self.use_cuda = False print('** Cuda not available for Hankel transform.') print('** Performing the Hankel transform on the CPU.') if self.use_cuda: # Initialize a cuda stream (required by cublas) self.blas = cublas.Blas() # Initialize two buffer arrays on the GPU # The cuBlas API requires that these arrays be in Fortran order zero_array = np.zeros((Nz, Nr), dtype=np.complex128, order='F') self.d_in = cuda.to_device(zero_array) self.d_out = cuda.to_device(zero_array) # Initialize the threads per block and block per grid self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr) # Call the corresponding initialization routine if self.method == 'FHT': self.FHT_init(p, Nr, rmax) elif self.method == 'QDHT': self.QDHT_init(p, Nr, rmax) elif self.method == 'MDHT(m,m)': self.MDHT_init(p, Nr, rmax, m=p, **kw) elif self.method == 'MDHT(m-1,m)': self.MDHT_init(p, Nr, rmax, m=p + 1, **kw) elif self.method == 'MDHT(m+1,m)': self.MDHT_init(p, Nr, rmax, m=p - 1, **kw)
def __init__(self, Nz, Nr, m, rmax, use_cuda=False): """ Initializes the dht and fft attributes, which contain auxiliary matrices allowing to transform the fields quickly Parameters ---------- Nz, Nr : int Number of points along z and r respectively m : int Index of the mode (needed for the Hankel transform) rmax : float The size of the simulation box along r. """ # Check whether to use the GPU self.use_cuda = use_cuda if (self.use_cuda is True) and (cuda_installed is False): self.use_cuda = False if self.use_cuda: # Initialize the dimension of the grid and blocks self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr) # Initialize the DHT (local implementation, see hankel.py) self.dht0 = DHT(m, Nr, Nz, rmax, 'MDHT(m,m)', d=0.5, Fw='inverse', use_cuda=self.use_cuda) self.dhtp = DHT(m + 1, Nr, Nz, rmax, 'MDHT(m+1,m)', d=0.5, Fw='inverse', use_cuda=self.use_cuda) self.dhtm = DHT(m - 1, Nr, Nz, rmax, 'MDHT(m-1,m)', d=0.5, Fw='inverse', use_cuda=self.use_cuda) # Initialize the FFT self.fft = FFT(Nr, Nz, use_cuda=self.use_cuda) # Extract the spectral buffers # - In the case where the GPU is used, these buffers are cuda # device arrays. # - In the case where the CPU is used, these buffers are tied to # the FFTW plan object (see the __init__ of the FFT object). Do # *not* modify these buffers to make them point to another array. self.spect_buffer_r, self.spect_buffer_t = self.fft.get_buffers() # Different names for same object (for economy of memory) self.spect_buffer_p = self.spect_buffer_r self.spect_buffer_m = self.spect_buffer_t
def push_eb_with(self, ps, use_true_rho=False ) : """ Push the fields over one timestep, using the psatd coefficients. Parameters ---------- ps : PsatdCoeffs object psatd object corresponding to the same m mode use_true_rho : bool, optional Whether to use the rho projected on the grid. If set to False, this will use div(E) and div(J) to evaluate rho and its time evolution. In the case use_true_rho==False, the rho projected on the grid is used only to correct the currents, and the simulation can be run without the neutralizing ions. """ # Check that psatd object passed as argument is the right one # (i.e. corresponds to the right mode) assert( self.m == ps.m ) if self.use_cuda : # Obtain the cuda grid dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr) # Push the fields on the GPU if ps.V is None: # With the standard PSATD algorithm cuda_push_eb_standard[dim_grid, dim_block]( self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz, self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next, ps.d_rho_prev_coef, ps.d_rho_next_coef, ps.d_j_coef, ps.d_C, ps.d_S_w, self.d_kr, self.d_kz, ps.dt, use_true_rho, self.Nz, self.Nr ) else: # With the Galilean/comoving algorithm cuda_push_eb_comoving[dim_grid, dim_block]( self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz, self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next, ps.d_rho_prev_coef, ps.d_rho_next_coef, ps.d_j_coef, ps.d_C, ps.d_S_w, ps.d_T_eb, ps.d_T_cc, ps.d_T_rho, self.d_kr, self.d_kz, ps.dt, ps.V, use_true_rho, self.Nz, self.Nr ) else : # Push the fields on the CPU if ps.V is None: # With the standard PSATD algorithm numba_push_eb_standard( self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz, self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next, ps.rho_prev_coef, ps.rho_next_coef, ps.j_coef, ps.C, ps.S_w, self.kr, self.kz, ps.dt, use_true_rho, self.Nz, self.Nr ) else: # With the Galilean/comoving algorithm numba_push_eb_comoving( self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz, self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next, ps.rho_prev_coef, ps.rho_next_coef, ps.j_coef, ps.C, ps.S_w, ps.T_eb, ps.T_cc, ps.T_rho, self.kr, self.kz, ps.dt, ps.V, use_true_rho, self.Nz, self.Nr )
def copy_EB_buffers(self, interp, before_sending=False, after_receiving=False): """ Either copy the inner part of the domain to the sending buffer for E & B, or copy the receving buffer for E & B to the guard cells of the domain. Depending on whether the field data is initially on the CPU or on the GPU, this function will do the appropriate exchange with the device. Parameters ---------- interp: a list of InterpolationGrid objects (one element per azimuthal mode) before_sending: bool Whether to copy the inner part of the domain to the sending buffer after_receiving: bool Whether to copy the receiving buffer to the guard cells """ # Shortcut for the guard cells ng = self.n_guard copy_left = (self.left_proc is not None) copy_right = (self.right_proc is not None) # When using the GPU if interp[0].use_cuda: # Calculate the number of blocks and threads per block dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(ng, interp[0].Nr) if before_sending: # Copy the inner regions of the domain to the GPU buffers copy_EB_to_gpu_buffers[dim_grid_2d, dim_block_2d]( self.d_EB_l, self.d_EB_r, interp[0].Er, interp[0].Et, interp[0].Ez, interp[0].Br, interp[0].Bt, interp[0].Bz, interp[1].Er, interp[1].Et, interp[1].Ez, interp[1].Br, interp[1].Bt, interp[1].Bz, copy_left, copy_right, ng) # Copy the GPU buffers to the sending CPU buffers if copy_left: self.d_EB_l.copy_to_host(self.EB_send_l) if copy_right: self.d_EB_r.copy_to_host(self.EB_send_r) elif after_receiving: # Copy the CPU receiving buffers to the GPU buffers if copy_left: self.d_EB_l.copy_to_device(self.EB_recv_l) if copy_right: self.d_EB_r.copy_to_device(self.EB_recv_r) # Copy the GPU buffers to the guard cells of the domain copy_EB_from_gpu_buffers[dim_grid_2d, dim_block_2d]( self.d_EB_l, self.d_EB_r, interp[0].Er, interp[0].Et, interp[0].Ez, interp[0].Br, interp[0].Bt, interp[0].Bz, interp[1].Er, interp[1].Et, interp[1].Ez, interp[1].Br, interp[1].Bt, interp[1].Bz, copy_left, copy_right, ng) # Without GPU else: for m in range(self.Nm): offset = 6 * m if before_sending: # Copy the inner regions of the domain to the buffer if copy_left: self.EB_send_l[0 + offset, :, :] = interp[m].Er[ng:2 * ng, :] self.EB_send_l[1 + offset, :, :] = interp[m].Et[ng:2 * ng, :] self.EB_send_l[2 + offset, :, :] = interp[m].Ez[ng:2 * ng, :] self.EB_send_l[3 + offset, :, :] = interp[m].Br[ng:2 * ng, :] self.EB_send_l[4 + offset, :, :] = interp[m].Bt[ng:2 * ng, :] self.EB_send_l[5 + offset, :, :] = interp[m].Bz[ng:2 * ng, :] if copy_right: self.EB_send_r[0 + offset, :, :] = interp[m].Er[-2 * ng:-ng, :] self.EB_send_r[1 + offset, :, :] = interp[m].Et[-2 * ng:-ng, :] self.EB_send_r[2 + offset, :, :] = interp[m].Ez[-2 * ng:-ng, :] self.EB_send_r[3 + offset, :, :] = interp[m].Br[-2 * ng:-ng, :] self.EB_send_r[4 + offset, :, :] = interp[m].Bt[-2 * ng:-ng, :] self.EB_send_r[5 + offset, :, :] = interp[m].Bz[-2 * ng:-ng, :] elif after_receiving: # Copy the buffer to the guard cells of the domain if copy_left: interp[m].Er[:ng, :] = self.EB_recv_l[0 + offset, :, :] interp[m].Et[:ng, :] = self.EB_recv_l[1 + offset, :, :] interp[m].Ez[:ng, :] = self.EB_recv_l[2 + offset, :, :] interp[m].Br[:ng, :] = self.EB_recv_l[3 + offset, :, :] interp[m].Bt[:ng, :] = self.EB_recv_l[4 + offset, :, :] interp[m].Bz[:ng, :] = self.EB_recv_l[5 + offset, :, :] if copy_right: interp[m].Er[-ng:, :] = self.EB_recv_r[0 + offset, :, :] interp[m].Et[-ng:, :] = self.EB_recv_r[1 + offset, :, :] interp[m].Ez[-ng:, :] = self.EB_recv_r[2 + offset, :, :] interp[m].Br[-ng:, :] = self.EB_recv_r[3 + offset, :, :] interp[m].Bt[-ng:, :] = self.EB_recv_r[4 + offset, :, :] interp[m].Bz[-ng:, :] = self.EB_recv_r[5 + offset, :, :]