def add_J_to_gpu_array(iz_min, Jr_buffer, Jt_buffer, Jz_buffer, Jr, Jt, Jz, m): """ Add the small-size arrays Jr_buffer, Jt_buffer, Jz_buffer into the full-size arrays Jr, Jt, Jz on the GPU Parameters: ----------- iz_min: int Jr_buffer, Jt_buffer, Jz_buffer: 3darrays of complexs Arrays of shape (Nm, 2, Nr) that store the values of rho in the 2 cells that surround the antenna (for each mode). Jr, Jt, Jz: 2darrays of complexs Arrays of shape (Nz, Nr) that contain rho in the mode m m: int The index of the azimuthal mode involved """ # Use one thread per radial cell ir = cuda.grid(1) # Add the values if ir < Jr.shape[1]: Jr[iz_min, ir] += Jr_buffer[m, 0, ir] Jr[iz_min + 1, ir] += Jr_buffer[m, 1, ir] Jt[iz_min, ir] += Jt_buffer[m, 0, ir] Jt[iz_min + 1, ir] += Jt_buffer[m, 1, ir] Jz[iz_min, ir] += Jz_buffer[m, 0, ir] Jz[iz_min + 1, ir] += Jz_buffer[m, 1, ir]
def add_rho_to_gpu_array(iz_min, rho_buffer, rho, m): """ Add the small-size array rho_buffer into the full-size array rho on the GPU Parameters ---------- iz_min: int The index of the lowest cell in z that surrounds the antenna rho_buffer: 3darray of complexs Array of shape (Nm, 2, Nr) that stores the values of rho in the 2 cells that surround the antenna (for each mode). rho: 2darray of complexs Array of shape (Nz, Nr) that contains rho in the mode m m: int The index of the azimuthal mode involved """ # Use one thread per radial cell ir = cuda.grid(1) # Add the values if ir < rho.shape[1]: rho[iz_min, ir] += rho_buffer[m, 0, ir] rho[iz_min + 1, ir] += rho_buffer[m, 1, ir]
def shift_spect_array_gpu(field_array, shift_factor, n_move): """ Shift the field 'field_array' by n_move cells on the GPU. This is done in spectral space and corresponds to multiplying the fields with the factor exp(i*kz_true*dz)**n_move . Parameters ---------- field_array: 2darray of complexs Contains the value of the fields, and is modified by this function shift_factor: 1darray of complexs Contains the shift array, that is multiplied to the fields in spectral space to shift them by one cell in spatial space ( exp(i*kz_true*dz) ) n_move: int The number of cells by which the grid should be shifted """ # Get a 2D CUDA grid iz, ir = cuda.grid(2) # Only access values that are actually in the array if ir < field_array.shape[1] and iz < field_array.shape[0]: power_shift = 1. + 0.j # Calculate the shift factor (raising to the power n_move ; # for negative n_move, we take the complex conjugate, since # shift_factor is of the form e^{i k dz}) for i in range(abs(n_move)): power_shift *= shift_factor[iz] if n_move < 0: power_shift = power_shift.conjugate() # Shift fields field_array[iz, ir] *= power_shift
def extract_array_from_gpu(part_idx_start, array, selected): """ Extract a selection of particles from the GPU and store them in a 1D array (N_part,) Selection goes from starting index (part_idx_start) to (part_idx_start + N_part-1), where N_part is derived from the shape of the array `selected`. Parameters ---------- part_idx_start : int The starting index needed for the extraction process. ( minimum particle index to be extracted ) array : 1D arrays of ints or floats The GPU particle arrays for a given species. (e.g. particle id) selected : 1D array of ints or floats An empty GPU array to store the particles that are extracted. """ i = cuda.grid(1) N_part = selected.shape[0] if i < N_part: selected[i] = array[part_idx_start + i]
def copy_particle_data_cuda(Ntot, old_array, new_array): """ Copy the `Ntot` elements of `old_array` into `new_array`, on GPU """ # Loop over single particles ip = cuda.grid(1) if ip < Ntot: new_array[ip] = old_array[ip]
def split_particles_to_buffers( particle_array, left_buffer, stay_buffer, right_buffer, i_min, i_max ): """ Split the (sorted) particle array into the three arrays left_buffer, stay_buffer and right_buffer (in the same order) Parameters: ------------ particle_array: 1d device arrays of floats Original array of particles (represents *one* of the particle quantities) left_buffer, right_buffer: 1d device arrays of floats Will contain the particles that are outside of the physical domain Note: if the boundary is open, then these buffers have size 0 and in this case, they will not be filled (the corresponding particles are simply lost) stay_buffer: 1d device array of floats Will contain the particles that are inside the physical domain i_min, i_max: int Indices of particle_array between which particles are kept (and thus copied to stay_buffer). The particles below i_min (resp. above i_max) are copied to left_buffer (resp. right_buffer) """ # Get a 1D CUDA grid (the index corresponds to a particle index) i = cuda.grid(1) # Auxiliary variables n_left = left_buffer.shape[0] n_right = right_buffer.shape[0] Ntot = particle_array.shape[0] # Copy the particles into the right buffer if i < i_min: # Check whether buffer is not empty (open boundary) if (n_left != 0): left_buffer[i] = particle_array[i] elif i < i_max: stay_buffer[i-i_min] = particle_array[i] elif i < Ntot: # Check whether buffer is not empty (open boundary) if (n_right != 0): right_buffer[i-i_max] = particle_array[i]
def cuda_damp_pml_EB( Et, Et_pml, Ez, Bt, Bt_pml, Bz, damp_array, n_pml ) : """ Damp the E and B fields in the PML cells (i.e. the last n_pml cells in r), in an anisotropic manner which is given by the PML principles Parameters : ------------ Et, Et_pml, Ez, Bt, Bt_pml, Bz : 2darrays of complexs Contain the fields to be damped The first axis corresponds to z and the second to r damp_array: 1darray of floats An array of length n_guards, which contains the damping factors n_pml: int Number of PML cells """ # Obtain Cuda grid iz, i_pml = cuda.grid(2) # Obtain the size of the array along z and r Nz, Nr = Et.shape # Modify the fields if i_pml < n_pml: # Apply the damping arrays if iz < Nz: # Get the damping factor damp_factor= damp_array[i_pml] # Get the index in the bigger field array ir = Nr - n_pml + i_pml # Substract the theta PML fields to the regular theta fields Et[iz,ir] -= Et_pml[iz,ir] Bt[iz,ir] -= Bt_pml[iz,ir] # Damp the theta PML fields Et_pml[iz,ir] *= damp_factor Bt_pml[iz,ir] *= damp_factor # Add the theta PML fields back to the regular theta fields Et[iz,ir] += Et_pml[iz,ir] Bt[iz,ir] += Bt_pml[iz,ir] # Damp the z fields Ez[iz,ir] *= damp_factor Bz[iz,ir] *= damp_factor
def shift_particles_periodic_cuda( z, zmin, zmax ): """ Shift the particle positions by an integer number of box length, so that outside particle are back inside the physical domain Parameters: ----------- z: 1darray of floats The z position of the particles (one element per particle) zmin, zmax: floats Positions of the edges of the periodic box """ # Get a 1D CUDA grid (the index corresponds to a particle index) i = cuda.grid(1) # Get box length l_box = zmax - zmin # Shift particle position if i < z.shape[0]: while z[i] >= zmax: z[i] -= l_box while z[i] < zmin: z[i] += l_box
def copy_particles( N_elements, source_array, source_start, target_array, target_start ): """ Copy `N_elements` elements from `source_array` to `target_array` Parameters: ------------ N_elements: int The number of elements to copy source_array, target_array: 1d device arrays of floats The arrays from/to which the data should be copied (represents *one* of the particle quantities) source_start, target_start: ints The indices at which to start the copy, in both the source and the target. """ # Get a 1D CUDA grid (the index corresponds to a particle index) i = cuda.grid(1) # Copy the particles into the right buffer if i < N_elements: target_array[i+target_start] = source_array[i+source_start]
def extract_particles_from_gpu(part_idx_start, x, y, z, ux, uy, uz, w, inv_gamma, selected): """ Extract a selection of particles from the GPU and store them in a 2D array (8, N_part) in the following order: x, y, z, ux, uy, uz, w, inv_gamma. Selection goes from starting index (part_idx_start) to (part_idx_start + N_part-1), where N_part is derived from the shape of the 2D array (selected). Parameters ---------- part_idx_start : int The starting index needed for the extraction process. ( minimum particle index to be extracted ) x, y, z, ux, uy, uz, w, inv_gamma : 1D arrays of floats The GPU particle arrays for a given species. selected : 2D array of floats An empty GPU array to store the particles that are extracted. """ i = cuda.grid(1) N_part = selected.shape[1] if i < N_part: ptcl_idx = part_idx_start + i selected[0, i] = x[ptcl_idx] selected[1, i] = y[ptcl_idx] selected[2, i] = z[ptcl_idx] selected[3, i] = ux[ptcl_idx] selected[4, i] = uy[ptcl_idx] selected[5, i] = uz[ptcl_idx] selected[6, i] = w[ptcl_idx] selected[7, i] = inv_gamma[ptcl_idx]
def extract_slice_cuda(Nr, iz, Sz, slice_arr, Er, Et, Ez, Br, Bt, Bz, Jr, Jt, Jz, rho, m): """ Extract a slice of the fields at iz and iz+1, and interpolated between those two points using Sz and (1-Sz) Parameters ---------- Nr: int Number of cells transversally iz: int Index at which to extract the fields Sz: float Interpolation shape factor used at iz slice_arr: cupy.empty Array of floats of shape (10, 2*Nm-1, Nr) Er, Et, etc...: cupy.empty Array of complexs of shape (Nz, Nr), for the azimuthal mode m m: int Index of the azimuthal mode involved """ # One thread per radial position ir = cuda.grid(1) # Intermediate variables izp = iz + 1 Szp = 1. - Sz if ir < Nr: # Interpolate the field in the longitudinal direction # and store it into pre-packed arrays # For the higher modes: # There is a factor 2 here so as to comply with the convention in # Lifschitz et al., which is also the convention of FBPIC # For performance, this is included in the shape factor. if m > 0: Sz = 2 * Sz Szp = 2 * Szp # Index at which the mode should be added # in the array `slice_arr` im = 2 * m - 1 else: im = 0 # Real part slice_arr[0, im, ir] = Sz * Er[iz, ir].real + Szp * Er[izp, ir].real slice_arr[1, im, ir] = Sz * Et[iz, ir].real + Szp * Et[izp, ir].real slice_arr[2, im, ir] = Sz * Ez[iz, ir].real + Szp * Ez[izp, ir].real slice_arr[3, im, ir] = Sz * Br[iz, ir].real + Szp * Br[izp, ir].real slice_arr[4, im, ir] = Sz * Bt[iz, ir].real + Szp * Bt[izp, ir].real slice_arr[5, im, ir] = Sz * Bz[iz, ir].real + Szp * Bz[izp, ir].real slice_arr[6, im, ir] = Sz * Jr[iz, ir].real + Szp * Jr[izp, ir].real slice_arr[7, im, ir] = Sz * Jt[iz, ir].real + Szp * Jt[izp, ir].real slice_arr[8, im, ir] = Sz * Jz[iz, ir].real + Szp * Jz[izp, ir].real slice_arr[9, im, ir] = Sz * rho[iz, ir].real + Szp * rho[izp, ir].real if m > 0: # Imaginary part slice_arr[0, im + 1, ir] = Sz * Er[iz, ir].imag + Szp * Er[izp, ir].imag slice_arr[1, im + 1, ir] = Sz * Et[iz, ir].imag + Szp * Et[izp, ir].imag slice_arr[2, im + 1, ir] = Sz * Ez[iz, ir].imag + Szp * Ez[izp, ir].imag slice_arr[3, im + 1, ir] = Sz * Br[iz, ir].imag + Szp * Br[izp, ir].imag slice_arr[4, im + 1, ir] = Sz * Bt[iz, ir].imag + Szp * Bt[izp, ir].imag slice_arr[5, im + 1, ir] = Sz * Bz[iz, ir].imag + Szp * Bz[izp, ir].imag slice_arr[6, im + 1, ir] = Sz * Jr[iz, ir].imag + Szp * Jr[izp, ir].imag slice_arr[7, im + 1, ir] = Sz * Jt[iz, ir].imag + Szp * Jt[izp, ir].imag slice_arr[8, im + 1, ir] = Sz * Jz[iz, ir].imag + Szp * Jz[izp, ir].imag slice_arr[9, im + 1, ir] = Sz * rho[iz, ir].imag + Szp * rho[izp, ir].imag