def copy_J_buffer(self, iz_min, grid): """ Add the small-size arrays Jr_buffer, Jt_buffer, Jz_buffer into the full-size arrays Jr, Jt, Jz Parameters ---------- iz_min: int The z index in the full-size array, that corresponds to index 0 in the small-size array (i.e. position at which to add the small-size array into the full-size one) grid: a list of InterpolationGrid objects Contains the full-size array Jr, Jt, Jz """ if type(grid[0].Jr) is np.ndarray: # The large-size arrays for J are on the CPU for m in range(len(grid)): grid[m].Jr[iz_min:iz_min + 2] += self.Jr_buffer[m] grid[m].Jt[iz_min:iz_min + 2] += self.Jt_buffer[m] grid[m].Jz[iz_min:iz_min + 2] += self.Jz_buffer[m] else: # The large-size arrays for J are on the GPU # Copy the small-size buffers to the GPU cuda.to_device(self.Jr_buffer, to=self.d_Jr_buffer) cuda.to_device(self.Jt_buffer, to=self.d_Jt_buffer) cuda.to_device(self.Jz_buffer, to=self.d_Jz_buffer) # On the GPU: add the small-size buffers to the large-size array dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(grid[0].Nr, TPB=64) add_J_to_gpu_array[dim_grid_1d, dim_block_1d]( iz_min, self.d_Jr_buffer, self.d_Jt_buffer, self.d_Jz_buffer, grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt, grid[0].Jz, grid[1].Jz)
def sort_particles(self, fld): """ Sort the particles by performing the following steps: 1. Get fied cell index 2. Sort field cell index 3. Parallel prefix sum 4. Rearrange particle arrays Parameter ---------- fld : a Field object Contains the list of InterpolationGrid objects with the field values as well as the prefix sum. """ # Shortcut for interpolation grids grid = fld.interp # Get the threads per block and the blocks per grid dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot) dim_grid_2d_flat, dim_block_2d_flat = cuda_tpb_bpg_1d(grid[0].Nz * grid[0].Nr) # ------------------------ # Sorting of the particles # ------------------------ # Get the cell index of each particle # (defined by iz_lower and ir_lower) get_cell_idx_per_particle[dim_grid_1d, dim_block_1d](self.cell_idx, self.sorted_idx, self.x, self.y, self.z, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr) # Sort the cell index array and modify the sorted_idx array # accordingly. The value of the sorted_idx array corresponds # to the index of the sorted particle in the other particle # arrays. sort_particles_per_cell(self.cell_idx, self.sorted_idx) # Reset the old prefix sum fld.prefix_sum_shift = 0 reset_prefix_sum[dim_grid_2d_flat, dim_block_2d_flat](self.prefix_sum) # Perform the inclusive parallel prefix sum incl_prefix_sum[dim_grid_1d, dim_block_1d](self.cell_idx, self.prefix_sum) # Rearrange the particle arrays self.rearrange_particle_arrays()
def rearrange_particle_arrays(self): """ Rearranges the particle data arrays to match with the sorted cell index array. The sorted index array is used to resort the arrays. A particle buffer is used to temporarily store the rearranged data. """ # Get the threads per block and the blocks per grid dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot) # Iterate over (float) particle attributes attr_list = [ (self,'x'), (self,'y'), (self,'z'), \ (self,'ux'), (self,'uy'), (self,'uz'), \ (self, 'w'), (self,'inv_gamma') ] if self.ionizer is not None: attr_list += [(self.ionizer, 'neutral_weight')] for attr in attr_list: # Get particle GPU array particle_array = getattr(attr[0], attr[1]) # Write particle data to particle buffer array while rearranging write_sorting_buffer[dim_grid_1d, dim_block_1d](self.sorted_idx, particle_array, self.sorting_buffer) # Assign the particle buffer to # the initial particle data array setattr(attr[0], attr[1], self.sorting_buffer) # Assign the old particle data array to the particle buffer self.sorting_buffer = particle_array # Iterate over (integer) particle attributes attr_list = [] if self.tracker is not None: attr_list += [(self.tracker, 'id')] if self.ionizer is not None: attr_list += [(self.ionizer, 'ionization_level')] for attr in attr_list: # Get particle GPU array particle_array = getattr(attr[0], attr[1]) # Write particle data to particle buffer array while rearranging write_sorting_buffer[dim_grid_1d, dim_block_1d](self.sorted_idx, particle_array, self.int_sorting_buffer) # Assign the particle buffer to # the initial particle data array setattr(attr[0], attr[1], self.int_sorting_buffer) # Assign the old particle data array to the particle buffer self.int_sorting_buffer = particle_array
def generate_new_ids_gpu(self, i_start, i_end): """ Generate new unique ids, and use them to fill the array `id` in place from index `i_start` (included) to index `i_end` (excluded) Parameters ---------- i_start, i_end: int The indices between which new id should be generated """ N = i_end - i_start grid_1d, block_1d = cuda_tpb_bpg_1d(N) # Modify the array self.id in-place, # between the indices i_start and i_end generate_ids_gpu[grid_1d, block_1d](self.id, i_start, i_end, self.next_attributed_id, self.id_step) # Update the value of self.next_attributed_id self.next_attributed_id = self.next_attributed_id + N * self.id_step
def halfpush_x(self): """ Advance the particles' positions over one half-timestep This assumes that the positions (x, y, z) are initially either one half-timestep *behind* the momenta (ux, uy, uz), or at the same timestep as the momenta. """ if self.use_cuda: # Get the threads per block and the blocks per grid dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot) # Call the CUDA Kernel for halfpush in x push_x_gpu[dim_grid_1d, dim_block_1d](self.x, self.y, self.z, self.ux, self.uy, self.uz, self.inv_gamma, self.dt) # The particle array is unsorted after the push in x self.sorted = False else: push_x_numba(self.x, self.y, self.z, self.ux, self.uy, self.uz, self.inv_gamma, self.Ntot, self.dt)
def push_p(self): """ Advance the particles' momenta over one timestep, using the Vay pusher Reference : Vay, Physics of Plasmas 15, 056701 (2008) This assumes that the momenta (ux, uy, uz) are initially one half-timestep *behind* the positions (x, y, z), and it brings them one half-timestep *ahead* of the positions. """ if self.use_cuda: # Get the threads per block and the blocks per grid dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot) # Call the CUDA Kernel for the particle push if self.ionizer is None: push_p_gpu[dim_grid_1d, dim_block_1d](self.ux, self.uy, self.uz, self.inv_gamma, self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz, self.q, self.m, self.Ntot, self.dt) else: # Ionizable species can have a charge that depends on the # macroparticle, and hence require a different function push_p_ioniz_gpu[dim_grid_1d, dim_block_1d]( self.ux, self.uy, self.uz, self.inv_gamma, self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz, self.m, self.Ntot, self.dt, self.ionizer.ionization_level) else: if self.ionizer is None: push_p_numba(self.ux, self.uy, self.uz, self.inv_gamma, self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz, self.q, self.m, self.Ntot, self.dt) else: # Ionizable species can have a charge that depends on the # macroparticle, and hence require a different function push_p_ioniz_numba(self.ux, self.uy, self.uz, self.inv_gamma, self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz, self.m, self.Ntot, self.dt, self.ionizer.ionization_level)
def extract_slice_from_gpu(pref_sum_curr, N_area, species): """ Extract the particles which have which have index between pref_sum_curr and pref_sum_curr + N_area, and return them in dictionaries. Parameters ---------- pref_sum_curr: int The starting index needed for the extraction process N_area: int The number of particles to extract. species: an fbpic Species object The species from to extract data Returns ------- particle_data : A dictionary of 1D float arrays (that are on the CPU) A dictionary that contains the particle data of the simulation (with normalized weigths), including optional integer arrays (e.g. "id", "charge") """ # Call kernel that extracts particles from GPU dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(N_area) # - General particle quantities part_data = cuda.device_array((8, N_area), dtype=np.float64) extract_particles_from_gpu[dim_grid_1d, dim_block_1d](pref_sum_curr, species.x, species.y, species.z, species.ux, species.uy, species.uz, species.w, species.inv_gamma, part_data) # - Optional particle arrays if species.tracker is not None: selected_particle_id = cuda.device_array((N_area, ), dtype=np.uint64) extract_array_from_gpu[dim_grid_1d, dim_block_1d](pref_sum_curr, species.tracker.id, selected_particle_id) if species.ionizer is not None: selected_particle_charge = cuda.device_array((N_area, ), dtype=np.uint64) extract_array_from_gpu[dim_grid_1d, dim_block_1d](pref_sum_curr, species.ionizer.ionization_level, selected_particle_charge) selected_particle_weight = cuda.device_array((N_area, ), dtype=np.float64) extract_array_from_gpu[dim_grid_1d, dim_block_1d](pref_sum_curr, species.ionizer.neutral_weight, selected_particle_weight) # Copy GPU arrays to the host part_data = part_data.copy_to_host() particle_data = { 'x': part_data[0], 'y': part_data[1], 'z': part_data[2], 'ux': part_data[3], 'uy': part_data[4], 'uz': part_data[5], 'w': part_data[6] * (1. / species.q), 'inv_gamma': part_data[7] } if species.tracker is not None: particle_data['id'] = selected_particle_id.copy_to_host() if species.ionizer is not None: particle_data['charge'] = selected_particle_charge.copy_to_host() # Replace particle weight particle_data['w'] = selected_particle_weight.copy_to_host() # Return the data as dictionary return (particle_data)
def deposit(self, fld, fieldtype): """ Deposit the particles charge or current onto the grid This assumes that the particle positions (and momenta in the case of J) are currently at the same timestep as the field that is to be deposited Parameter ---------- fld : a Field object Contains the list of InterpolationGrid objects with the field values as well as the prefix sum. fieldtype : string Indicates which field to deposit Either 'J' or 'rho' """ # Shortcut for the list of InterpolationGrid objects grid = fld.interp if self.use_cuda == True: # Get the threads per block and the blocks per grid dim_grid_2d_flat, dim_block_2d_flat = cuda_tpb_bpg_1d(grid[0].Nz * grid[0].Nr) dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(grid[0].Nz, grid[0].Nr) # Create the helper arrays for deposition if self.particle_shape == 'linear_non_atomic': d_F0, d_F1, d_F2, d_F3 = cuda_deposition_arrays( grid[0].Nz, grid[0].Nr, fieldtype=fieldtype) # Sort the particles if self.sorted is False: self.sort_particles(fld=fld) # The particles are now sorted and rearranged self.sorted = True # Call the CUDA Kernel for the deposition of rho or J # for Mode 0 and 1 only. # Rho if fieldtype == 'rho': # Deposit rho in each of four directions if self.particle_shape == 'linear_non_atomic': deposit_rho_gpu[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, d_F0, d_F1, d_F2, d_F3, self.cell_idx, self.prefix_sum) # Add the four directions together add_rho[dim_grid_2d, dim_block_2d](grid[0].rho, grid[1].rho, d_F0, d_F1, d_F2, d_F3) elif self.particle_shape == 'cubic': deposit_rho_gpu_cubic[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].rho, grid[1].rho, self.cell_idx, self.prefix_sum) elif self.particle_shape == 'linear': deposit_rho_gpu_linear[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].rho, grid[1].rho, self.cell_idx, self.prefix_sum) else: raise ValueError( "`particle_shape` should be either 'linear', 'linear_atomic' \ or 'cubic' but is `%s`" % self.particle_shape) # J elif fieldtype == 'J': # Deposit J in each of four directions if self.particle_shape == 'linear_non_atomic': deposit_J_gpu[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, self.ux, self.uy, self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, d_F0, d_F1, d_F2, d_F3, self.cell_idx, self.prefix_sum) # Add the four directions together add_J[dim_grid_2d, dim_block_2d](grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt, grid[0].Jz, grid[1].Jz, d_F0, d_F1, d_F2, d_F3) elif self.particle_shape == 'cubic': deposit_J_gpu_cubic[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, self.ux, self.uy, self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt, grid[0].Jz, grid[1].Jz, self.cell_idx, self.prefix_sum) elif self.particle_shape == 'linear': deposit_J_gpu_linear[dim_grid_2d_flat, dim_block_2d_flat]( self.x, self.y, self.z, self.w, self.ux, self.uy, self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt, grid[0].Jz, grid[1].Jz, self.cell_idx, self.prefix_sum) else: raise ValueError("`particle_shape` should be either \ 'linear', 'linear_atomic' or 'cubic' \ but is `%s`" % self.particle_shape) else: raise ValueError("`fieldtype` should be either 'J' or \ 'rho', but is `%s`" % fieldtype) # CPU version else: # Preliminary arrays for the cylindrical conversion r = np.sqrt(self.x**2 + self.y**2) # Avoid division by 0. invr = 1. / np.where(r != 0., r, 1.) cos = np.where(r != 0., self.x * invr, 1.) sin = np.where(r != 0., self.y * invr, 0.) # Indices and weights if self.particle_shape == 'cubic': shape_order = 3 else: shape_order = 1 iz, Sz = weights(self.z, grid[0].invdz, grid[0].zmin, grid[0].Nz, direction='z', shape_order=shape_order) ir, Sr = weights(r, grid[0].invdr, grid[0].rmin, grid[0].Nr, direction='r', shape_order=shape_order) # Number of modes considered : # number of elements in the grid list Nm = len(grid) if fieldtype == 'rho': # --------------------------------------- # Deposit the charge density mode by mode # --------------------------------------- # Prepare auxiliary matrix exptheta = np.ones(self.Ntot, dtype='complex') # exptheta takes the value exp(im theta) throughout the loop for m in range(Nm): # Increment exptheta (notice the + : forward transform) if m == 1: exptheta[:].real = cos exptheta[:].imag = sin elif m > 1: exptheta[:] = exptheta * (cos + 1.j * sin) # Deposit the fields # (The sign -1 with which the guards are added is not # trivial to derive but avoids artifacts on the axis) deposit_field_numba(self.w * exptheta, grid[m].rho, iz, ir, Sz, Sr, -1.) elif fieldtype == 'J': # ---------------------------------------- # Deposit the current density mode by mode # ---------------------------------------- # Calculate the currents Jr = self.w * c * self.inv_gamma * (cos * self.ux + sin * self.uy) Jt = self.w * c * self.inv_gamma * (cos * self.uy - sin * self.ux) Jz = self.w * c * self.inv_gamma * self.uz # Prepare auxiliary matrix exptheta = np.ones(self.Ntot, dtype='complex') # exptheta takes the value exp(im theta) throughout the loop for m in range(Nm): # Increment exptheta (notice the + : forward transform) if m == 1: exptheta[:].real = cos exptheta[:].imag = sin elif m > 1: exptheta[:] = exptheta * (cos + 1.j * sin) # Deposit the fields # (The sign -1 with which the guards are added is not # trivial to derive but avoids artifacts on the axis) deposit_field_numba(Jr * exptheta, grid[m].Jr, iz, ir, Sz, Sr, -1.) deposit_field_numba(Jt * exptheta, grid[m].Jt, iz, ir, Sz, Sr, -1.) deposit_field_numba(Jz * exptheta, grid[m].Jz, iz, ir, Sz, Sr, -1.) else: raise ValueError( "`fieldtype` should be either 'J' or 'rho', but is `%s`" % fieldtype)
def gather(self, grid): """ Gather the fields onto the macroparticles This assumes that the particle positions are currently at the same timestep as the field that is to be gathered. Parameter ---------- grid : a list of InterpolationGrid objects (one InterpolationGrid object per azimuthal mode) Contains the field values on the interpolation grid """ if self.use_cuda == True: # Get the threads per block and the blocks per grid dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot) # Call the CUDA Kernel for the gathering of E and B Fields # for Mode 0 and 1 only. if self.particle_shape == 'cubic': gather_field_gpu_cubic[dim_grid_1d, dim_block_1d]( self.x, self.y, self.z, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].Er, grid[0].Et, grid[0].Ez, grid[1].Er, grid[1].Et, grid[1].Ez, grid[0].Br, grid[0].Bt, grid[0].Bz, grid[1].Br, grid[1].Bt, grid[1].Bz, self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz) else: gather_field_gpu_linear[dim_grid_1d, dim_block_1d]( self.x, self.y, self.z, grid[0].invdz, grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr, grid[0].Er, grid[0].Et, grid[0].Ez, grid[1].Er, grid[1].Et, grid[1].Ez, grid[0].Br, grid[0].Bt, grid[0].Bz, grid[1].Br, grid[1].Bt, grid[1].Bz, self.Ex, self.Ey, self.Ez, self.Bx, self.By, self.Bz) else: # Preliminary arrays for the cylindrical conversion r = np.sqrt(self.x**2 + self.y**2) # Avoid division by 0. invr = 1. / np.where(r != 0., r, 1.) cos = np.where(r != 0., self.x * invr, 1.) sin = np.where(r != 0., self.y * invr, 0.) # Indices and weights if self.particle_shape == 'cubic': shape_order = 3 else: shape_order = 1 iz, Sz = weights(self.z, grid[0].invdz, grid[0].zmin, grid[0].Nz, direction='z', shape_order=shape_order) ir, Sr = weights(r, grid[0].invdr, grid[0].rmin, grid[0].Nr, direction='r', shape_order=shape_order) # Number of modes considered : # number of elements in the grid list Nm = len(grid) # ------------------------------- # Gather the E field mode by mode # ------------------------------- # Zero the previous fields self.Ex[:] = 0. self.Ey[:] = 0. self.Ez[:] = 0. # Prepare auxiliary matrices Ft = np.zeros(self.Ntot) Fr = np.zeros(self.Ntot) exptheta = np.ones(self.Ntot, dtype='complex') # exptheta takes the value exp(-im theta) throughout the loop for m in range(Nm): # Increment exptheta (notice the - : backward transform) if m == 1: exptheta[:].real = cos exptheta[:].imag = -sin elif m > 1: exptheta[:] = exptheta * (cos - 1.j * sin) # Gather the fields # (The sign with which the guards are added # depends on whether the fields should be zero on axis) gather_field_numba(exptheta, m, grid[m].Er, Fr, iz, ir, Sz, Sr, -((-1.)**m)) gather_field_numba(exptheta, m, grid[m].Et, Ft, iz, ir, Sz, Sr, -((-1.)**m)) gather_field_numba(exptheta, m, grid[m].Ez, self.Ez, iz, ir, Sz, Sr, (-1.)**m) # Convert to Cartesian coordinates self.Ex[:] = cos * Fr - sin * Ft self.Ey[:] = sin * Fr + cos * Ft # ------------------------------- # Gather the B field mode by mode # ------------------------------- # Zero the previous fields self.Bx[:] = 0. self.By[:] = 0. self.Bz[:] = 0. # Prepare auxiliary matrices Ft[:] = 0. Fr[:] = 0. exptheta[:] = 1. # exptheta takes the value exp(-im theta) throughout the loop for m in range(Nm): # Increment exptheta (notice the - : backward transform) if m == 1: exptheta[:].real = cos exptheta[:].imag = -sin elif m > 1: exptheta[:] = exptheta * (cos - 1.j * sin) # Gather the fields # (The sign with which the guards are added # depends on whether the fields should be zero on axis) gather_field_numba(exptheta, m, grid[m].Br, Fr, iz, ir, Sz, Sr, -((-1.)**m)) gather_field_numba(exptheta, m, grid[m].Bt, Ft, iz, ir, Sz, Sr, -((-1.)**m)) gather_field_numba(exptheta, m, grid[m].Bz, self.Bz, iz, ir, Sz, Sr, (-1.)**m) # Convert to Cartesian coordinates self.Bx[:] = cos * Fr - sin * Ft self.By[:] = sin * Fr + cos * Ft
def handle_ionization_gpu(self, ion): """ Handle ionization on the GPU: - For each ion macroparticle, decide whether it is going to be further ionized during this timestep, based on the ADK rate. - Add the electrons created from ionization to the `target_species` Parameters: ----------- ion: an fbpic.Particles object The ionizable species, from which new electrons are created. """ # Process particles in batches (of typically 10, 20 particles) N_batch = int(ion.Ntot / self.batch_size) + 1 # Create temporary arrays is_ionized = cuda.device_array((ion.Ntot, ), dtype=np.int16) n_ionized = cuda.device_array((N_batch, ), dtype=np.int64) # Draw random numbers random_draw = cuda.device_array((ion.Ntot, ), dtype=np.float32) self.prng.uniform(random_draw) # Ionize the ions (one thread per batch) batch_grid_1d, batch_block_1d = cuda_tpb_bpg_1d(N_batch) ionize_ions_cuda[batch_grid_1d, batch_block_1d]( N_batch, self.batch_size, ion.Ntot, self.level_max, n_ionized, is_ionized, self.ionization_level, random_draw, self.adk_prefactor, self.adk_power, self.adk_exp_prefactor, ion.ux, ion.uy, ion.uz, ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz, ion.w, self.neutral_weight) # Count the total number of electrons (operation performed # on the CPU, as this is typically difficult on the GPU) n_ionized = n_ionized.copy_to_host() cumulative_n_ionized = np.zeros(len(n_ionized) + 1, dtype=np.int64) np.cumsum(n_ionized, out=cumulative_n_ionized[1:]) # If no new particle was created, skip the rest of this function if cumulative_n_ionized[-1] == 0: return # Reallocate the electron species, in order to # accomodate the electrons produced by ionization elec = self.target_species old_Ntot = elec.Ntot new_Ntot = old_Ntot + cumulative_n_ionized[-1] # Iterate over particle attributes and copy the old electrons # (one thread per particle) ptcl_grid_1d, ptcl_block_1d = cuda_tpb_bpg_1d(old_Ntot) for attr in [ 'x', 'y', 'z', 'ux', 'uy', 'uz', 'w', 'inv_gamma', 'Ex', 'Ey', 'Ez', 'Bx', 'By', 'Bz' ]: old_array = getattr(elec, attr) new_array = cuda.device_array((new_Ntot, ), dtype=np.float64) copy_particle_data_cuda[ptcl_grid_1d, ptcl_block_1d](old_Ntot, old_array, new_array) setattr(elec, attr, new_array) if elec.tracker is not None: old_array = elec.tracker.id new_array = cuda.device_array((new_Ntot, ), dtype=np.uint64) copy_particle_data_cuda[ptcl_grid_1d, ptcl_block_1d](old_Ntot, old_array, new_array) elec.tracker.id = new_array # Allocate the auxiliary arrays elec.cell_idx = cuda.device_array((new_Ntot, ), dtype=np.int32) elec.sorted_idx = cuda.device_array((new_Ntot, ), dtype=np.uint32) elec.sorting_buffer = cuda.device_array((new_Ntot, ), dtype=np.float64) if elec.n_integer_quantities > 0: elec.int_sorting_buffer = \ cuda.device_array( (new_Ntot,), dtype=np.uint64 ) # Modify the total number of electrons elec.Ntot = new_Ntot # Send `cumulative_n_ionized` back to the GPU cumulative_n_ionized = cuda.to_device(cumulative_n_ionized) # Copy the new electrons from ionization (one thread per batch) copy_ionized_electrons_cuda[batch_grid_1d, batch_block_1d]( N_batch, self.batch_size, old_Ntot, ion.Ntot, cumulative_n_ionized, is_ionized, elec.x, elec.y, elec.z, elec.inv_gamma, elec.ux, elec.uy, elec.uz, elec.w, elec.Ex, elec.Ey, elec.Ez, elec.Bx, elec.By, elec.Bz, ion.x, ion.y, ion.z, ion.inv_gamma, ion.ux, ion.uy, ion.uz, self.neutral_weight, ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz) elec.sorted = False # If the electrons are tracked, generate new ids if elec.tracker is not None: elec.tracker.generate_new_ids_gpu(old_Ntot, new_Ntot)
def extract_slice(self, fld, comm, z_boost, zmin_boost, slice_array): """ Fills `slice_array` with a slice of the fields at z_boost (the fields returned are still in the boosted frame ; for performance, the Lorentz transform of the fields values is performed only when flushing to disk) Parameters ---------- fld: a Fields object The object from which to extract the fields comm: a BoundaryCommunicator object Contains information about the gard cells in particular z_boost: float (meters) Position of the slice in the boosted frame zmin_boost: float (meters) Position of the left end of physical part of the local subdomain (i.e. excludes guard cells) slice_array: either a numpy array or a cuda device array An array of reals that packs together the slices of the different fields (always on array on the CPU). The first index of this array corresponds to the field type (10 different field types), and the correspondance between the field type and integer index is given field_to_index The shape of this arrays is (10, 2*Nm-1, Nr) """ # Find the index of the slice in the boosted frame # and the corresponding interpolation shape factor dz = fld.interp[0].dz # Find the interpolation data in the z direction z_staggered_gridunits = (z_boost - zmin_boost - 0.5 * dz) / dz iz = int(z_staggered_gridunits) Sz = iz + 1 - z_staggered_gridunits # Add the guard cells to the index iz if comm is not None: iz += comm.n_guard # Extract the slice directly on the CPU # Fill the pre-allocated CPU array slice_array if fld.use_cuda is False: # Extract a slice of the fields *in the boosted frame* # at z_boost, using interpolation, and store them in slice_array self.extract_slice_cpu(fld, iz, Sz, slice_array) # Extract the slice on the GPU # Fill the pre-allocated GPU array slice_array else: # Prepare kernel call interp = fld.interp Nr = fld.Nr dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(Nr) # Extract the slices slice_array = extract_slice_cuda[dim_grid_1d, dim_block_1d]( Nr, iz, Sz, slice_array, interp[0].Er, interp[0].Et, interp[0].Ez, interp[0].Br, interp[0].Bt, interp[0].Bz, interp[0].Jr, interp[0].Jt, interp[0].Jz, interp[0].rho, interp[1].Er, interp[1].Et, interp[1].Ez, interp[1].Br, interp[1].Bt, interp[1].Bz, interp[1].Jr, interp[1].Jt, interp[1].Jz, interp[1].rho)