Python cuda_tpb_bpg_1d примеры, fbpic.cuda_utils.cuda_tpb_bpg_1d Python примеры использования

Пример #1

0

Показать файл

    def copy_J_buffer(self, iz_min, grid):
        """
        Add the small-size arrays Jr_buffer, Jt_buffer, Jz_buffer into
        the full-size arrays Jr, Jt, Jz

        Parameters
        ----------
        iz_min: int
            The z index in the full-size array, that corresponds to index 0
            in the small-size array (i.e. position at which to add the
            small-size array into the full-size one)

        grid: a list of InterpolationGrid objects
            Contains the full-size array Jr, Jt, Jz
        """
        if type(grid[0].Jr) is np.ndarray:
            # The large-size arrays for J are on the CPU
            for m in range(len(grid)):
                grid[m].Jr[iz_min:iz_min + 2] += self.Jr_buffer[m]
                grid[m].Jt[iz_min:iz_min + 2] += self.Jt_buffer[m]
                grid[m].Jz[iz_min:iz_min + 2] += self.Jz_buffer[m]
        else:
            # The large-size arrays for J are on the GPU
            # Copy the small-size buffers to the GPU
            cuda.to_device(self.Jr_buffer, to=self.d_Jr_buffer)
            cuda.to_device(self.Jt_buffer, to=self.d_Jt_buffer)
            cuda.to_device(self.Jz_buffer, to=self.d_Jz_buffer)
            # On the GPU: add the small-size buffers to the large-size array
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(grid[0].Nr, TPB=64)
            add_J_to_gpu_array[dim_grid_1d, dim_block_1d](
                iz_min, self.d_Jr_buffer, self.d_Jt_buffer, self.d_Jz_buffer,
                grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt, grid[0].Jz,
                grid[1].Jz)

Пример #2

0

Показать файл

Файл: particles.py Проект: hemenhosseini/fbpic

    def sort_particles(self, fld):
        """
        Sort the particles by performing the following steps:
        1. Get fied cell index
        2. Sort field cell index
        3. Parallel prefix sum
        4. Rearrange particle arrays

        Parameter
        ----------
        fld : a Field object
             Contains the list of InterpolationGrid objects with
             the field values as well as the prefix sum.
        """
        # Shortcut for interpolation grids
        grid = fld.interp
        # Get the threads per block and the blocks per grid
        dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
        dim_grid_2d_flat, dim_block_2d_flat = cuda_tpb_bpg_1d(grid[0].Nz *
                                                              grid[0].Nr)
        # ------------------------
        # Sorting of the particles
        # ------------------------
        # Get the cell index of each particle
        # (defined by iz_lower and ir_lower)
        get_cell_idx_per_particle[dim_grid_1d,
                                  dim_block_1d](self.cell_idx, self.sorted_idx,
                                                self.x, self.y, self.z,
                                                grid[0].invdz, grid[0].zmin,
                                                grid[0].Nz, grid[0].invdr,
                                                grid[0].rmin, grid[0].Nr)
        # Sort the cell index array and modify the sorted_idx array
        # accordingly. The value of the sorted_idx array corresponds
        # to the index of the sorted particle in the other particle
        # arrays.
        sort_particles_per_cell(self.cell_idx, self.sorted_idx)
        # Reset the old prefix sum
        fld.prefix_sum_shift = 0
        reset_prefix_sum[dim_grid_2d_flat, dim_block_2d_flat](self.prefix_sum)
        # Perform the inclusive parallel prefix sum
        incl_prefix_sum[dim_grid_1d, dim_block_1d](self.cell_idx,
                                                   self.prefix_sum)
        # Rearrange the particle arrays
        self.rearrange_particle_arrays()

Пример #3

0

Показать файл

Файл: particles.py Проект: hemenhosseini/fbpic

 def rearrange_particle_arrays(self):
     """
     Rearranges the particle data arrays to match with the sorted
     cell index array. The sorted index array is used to resort the
     arrays. A particle buffer is used to temporarily store
     the rearranged data.
     """
     # Get the threads per block and the blocks per grid
     dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
     # Iterate over (float) particle attributes
     attr_list = [ (self,'x'), (self,'y'), (self,'z'), \
                     (self,'ux'), (self,'uy'), (self,'uz'), \
                     (self, 'w'), (self,'inv_gamma') ]
     if self.ionizer is not None:
         attr_list += [(self.ionizer, 'neutral_weight')]
     for attr in attr_list:
         # Get particle GPU array
         particle_array = getattr(attr[0], attr[1])
         # Write particle data to particle buffer array while rearranging
         write_sorting_buffer[dim_grid_1d,
                              dim_block_1d](self.sorted_idx, particle_array,
                                            self.sorting_buffer)
         # Assign the particle buffer to
         # the initial particle data array
         setattr(attr[0], attr[1], self.sorting_buffer)
         # Assign the old particle data array to the particle buffer
         self.sorting_buffer = particle_array
     # Iterate over (integer) particle attributes
     attr_list = []
     if self.tracker is not None:
         attr_list += [(self.tracker, 'id')]
     if self.ionizer is not None:
         attr_list += [(self.ionizer, 'ionization_level')]
     for attr in attr_list:
         # Get particle GPU array
         particle_array = getattr(attr[0], attr[1])
         # Write particle data to particle buffer array while rearranging
         write_sorting_buffer[dim_grid_1d,
                              dim_block_1d](self.sorted_idx, particle_array,
                                            self.int_sorting_buffer)
         # Assign the particle buffer to
         # the initial particle data array
         setattr(attr[0], attr[1], self.int_sorting_buffer)
         # Assign the old particle data array to the particle buffer
         self.int_sorting_buffer = particle_array

Пример #4

0

Показать файл

    def generate_new_ids_gpu(self, i_start, i_end):
        """
        Generate new unique ids, and use them to fill the array `id` in place
        from index `i_start` (included) to index `i_end` (excluded)

        Parameters
        ----------
        i_start, i_end: int
            The indices between which new id should be generated
        """
        N = i_end - i_start
        grid_1d, block_1d = cuda_tpb_bpg_1d(N)
        # Modify the array self.id in-place,
        # between the indices i_start and i_end
        generate_ids_gpu[grid_1d,
                         block_1d](self.id, i_start, i_end,
                                   self.next_attributed_id, self.id_step)
        # Update the value of self.next_attributed_id
        self.next_attributed_id = self.next_attributed_id + N * self.id_step

Пример #5

0

Показать файл

Файл: particles.py Проект: hemenhosseini/fbpic

    def halfpush_x(self):
        """
        Advance the particles' positions over one half-timestep

        This assumes that the positions (x, y, z) are initially either
        one half-timestep *behind* the momenta (ux, uy, uz), or at the
        same timestep as the momenta.
        """
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
            # Call the CUDA Kernel for halfpush in x
            push_x_gpu[dim_grid_1d,
                       dim_block_1d](self.x, self.y, self.z, self.ux, self.uy,
                                     self.uz, self.inv_gamma, self.dt)
            # The particle array is unsorted after the push in x
            self.sorted = False
        else:
            push_x_numba(self.x, self.y, self.z, self.ux, self.uy, self.uz,
                         self.inv_gamma, self.Ntot, self.dt)

Пример #6

0

Показать файл

Файл: particles.py Проект: hemenhosseini/fbpic

    def push_p(self):
        """
        Advance the particles' momenta over one timestep, using the Vay pusher
        Reference : Vay, Physics of Plasmas 15, 056701 (2008)

        This assumes that the momenta (ux, uy, uz) are initially one
        half-timestep *behind* the positions (x, y, z), and it brings
        them one half-timestep *ahead* of the positions.
        """
        if self.use_cuda:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
            # Call the CUDA Kernel for the particle push
            if self.ionizer is None:
                push_p_gpu[dim_grid_1d,
                           dim_block_1d](self.ux, self.uy, self.uz,
                                         self.inv_gamma, self.Ex, self.Ey,
                                         self.Ez, self.Bx, self.By, self.Bz,
                                         self.q, self.m, self.Ntot, self.dt)
            else:
                # Ionizable species can have a charge that depends on the
                # macroparticle, and hence require a different function
                push_p_ioniz_gpu[dim_grid_1d, dim_block_1d](
                    self.ux, self.uy, self.uz, self.inv_gamma, self.Ex,
                    self.Ey, self.Ez, self.Bx, self.By, self.Bz, self.m,
                    self.Ntot, self.dt, self.ionizer.ionization_level)
        else:
            if self.ionizer is None:
                push_p_numba(self.ux, self.uy, self.uz, self.inv_gamma,
                             self.Ex, self.Ey, self.Ez, self.Bx, self.By,
                             self.Bz, self.q, self.m, self.Ntot, self.dt)
            else:
                # Ionizable species can have a charge that depends on the
                # macroparticle, and hence require a different function
                push_p_ioniz_numba(self.ux, self.uy, self.uz, self.inv_gamma,
                                   self.Ex, self.Ey, self.Ez, self.Bx, self.By,
                                   self.Bz, self.m, self.Ntot, self.dt,
                                   self.ionizer.ionization_level)

Пример #7

0

Показать файл

def extract_slice_from_gpu(pref_sum_curr, N_area, species):
    """
    Extract the particles which have which have index between pref_sum_curr
    and pref_sum_curr + N_area, and return them in dictionaries.

    Parameters
    ----------
    pref_sum_curr: int
        The starting index needed for the extraction process
    N_area: int
        The number of particles to extract.
    species: an fbpic Species object
        The species from to extract data

    Returns
    -------
    particle_data : A dictionary of 1D float arrays (that are on the CPU)
        A dictionary that contains the particle data of
        the simulation (with normalized weigths), including optional
        integer arrays (e.g. "id", "charge")
    """
    # Call kernel that extracts particles from GPU
    dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(N_area)
    # - General particle quantities
    part_data = cuda.device_array((8, N_area), dtype=np.float64)
    extract_particles_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr, species.x,
                                             species.y, species.z, species.ux,
                                             species.uy, species.uz, species.w,
                                             species.inv_gamma, part_data)
    # - Optional particle arrays
    if species.tracker is not None:
        selected_particle_id = cuda.device_array((N_area, ), dtype=np.uint64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr, species.tracker.id,
                                             selected_particle_id)
    if species.ionizer is not None:
        selected_particle_charge = cuda.device_array((N_area, ),
                                                     dtype=np.uint64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr,
                                             species.ionizer.ionization_level,
                                             selected_particle_charge)
        selected_particle_weight = cuda.device_array((N_area, ),
                                                     dtype=np.float64)
        extract_array_from_gpu[dim_grid_1d,
                               dim_block_1d](pref_sum_curr,
                                             species.ionizer.neutral_weight,
                                             selected_particle_weight)

    # Copy GPU arrays to the host
    part_data = part_data.copy_to_host()
    particle_data = {
        'x': part_data[0],
        'y': part_data[1],
        'z': part_data[2],
        'ux': part_data[3],
        'uy': part_data[4],
        'uz': part_data[5],
        'w': part_data[6] * (1. / species.q),
        'inv_gamma': part_data[7]
    }
    if species.tracker is not None:
        particle_data['id'] = selected_particle_id.copy_to_host()
    if species.ionizer is not None:
        particle_data['charge'] = selected_particle_charge.copy_to_host()
        # Replace particle weight
        particle_data['w'] = selected_particle_weight.copy_to_host()

    # Return the data as dictionary
    return (particle_data)

Пример #8

0

Показать файл

Файл: particles.py Проект: hemenhosseini/fbpic

    def deposit(self, fld, fieldtype):
        """
        Deposit the particles charge or current onto the grid

        This assumes that the particle positions (and momenta in the case of J)
        are currently at the same timestep as the field that is to be deposited

        Parameter
        ----------
        fld : a Field object
             Contains the list of InterpolationGrid objects with
             the field values as well as the prefix sum.

        fieldtype : string
             Indicates which field to deposit
             Either 'J' or 'rho'
        """
        # Shortcut for the list of InterpolationGrid objects
        grid = fld.interp

        if self.use_cuda == True:
            # Get the threads per block and the blocks per grid
            dim_grid_2d_flat, dim_block_2d_flat = cuda_tpb_bpg_1d(grid[0].Nz *
                                                                  grid[0].Nr)
            dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(grid[0].Nz, grid[0].Nr)

            # Create the helper arrays for deposition
            if self.particle_shape == 'linear_non_atomic':
                d_F0, d_F1, d_F2, d_F3 = cuda_deposition_arrays(
                    grid[0].Nz, grid[0].Nr, fieldtype=fieldtype)

            # Sort the particles
            if self.sorted is False:
                self.sort_particles(fld=fld)
                # The particles are now sorted and rearranged
                self.sorted = True

            # Call the CUDA Kernel for the deposition of rho or J
            # for Mode 0 and 1 only.
            # Rho
            if fieldtype == 'rho':
                # Deposit rho in each of four directions
                if self.particle_shape == 'linear_non_atomic':
                    deposit_rho_gpu[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, grid[0].invdz,
                        grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin,
                        grid[0].Nr, d_F0, d_F1, d_F2, d_F3, self.cell_idx,
                        self.prefix_sum)
                    # Add the four directions together
                    add_rho[dim_grid_2d,
                            dim_block_2d](grid[0].rho, grid[1].rho, d_F0, d_F1,
                                          d_F2, d_F3)
                elif self.particle_shape == 'cubic':
                    deposit_rho_gpu_cubic[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, grid[0].invdz,
                        grid[0].zmin, grid[0].Nz, grid[0].invdr, grid[0].rmin,
                        grid[0].Nr, grid[0].rho, grid[1].rho, self.cell_idx,
                        self.prefix_sum)
                elif self.particle_shape == 'linear':
                    deposit_rho_gpu_linear[dim_grid_2d_flat,
                                           dim_block_2d_flat](
                                               self.x, self.y, self.z, self.w,
                                               grid[0].invdz, grid[0].zmin,
                                               grid[0].Nz, grid[0].invdr,
                                               grid[0].rmin, grid[0].Nr,
                                               grid[0].rho, grid[1].rho,
                                               self.cell_idx, self.prefix_sum)
                else:
                    raise ValueError(
                        "`particle_shape` should be either 'linear', 'linear_atomic' \
                                      or 'cubic' but is `%s`" %
                        self.particle_shape)
            # J
            elif fieldtype == 'J':
                # Deposit J in each of four directions
                if self.particle_shape == 'linear_non_atomic':
                    deposit_J_gpu[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, self.ux, self.uy,
                        self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin,
                        grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        d_F0, d_F1, d_F2, d_F3, self.cell_idx, self.prefix_sum)
                    # Add the four directions together
                    add_J[dim_grid_2d,
                          dim_block_2d](grid[0].Jr, grid[1].Jr, grid[0].Jt,
                                        grid[1].Jt, grid[0].Jz, grid[1].Jz,
                                        d_F0, d_F1, d_F2, d_F3)
                elif self.particle_shape == 'cubic':
                    deposit_J_gpu_cubic[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, self.ux, self.uy,
                        self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin,
                        grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt,
                        grid[0].Jz, grid[1].Jz, self.cell_idx, self.prefix_sum)
                elif self.particle_shape == 'linear':
                    deposit_J_gpu_linear[dim_grid_2d_flat, dim_block_2d_flat](
                        self.x, self.y, self.z, self.w, self.ux, self.uy,
                        self.uz, self.inv_gamma, grid[0].invdz, grid[0].zmin,
                        grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                        grid[0].Jr, grid[1].Jr, grid[0].Jt, grid[1].Jt,
                        grid[0].Jz, grid[1].Jz, self.cell_idx, self.prefix_sum)
                else:
                    raise ValueError("`particle_shape` should be either \
                                      'linear', 'linear_atomic' or 'cubic' \
                                       but is `%s`" % self.particle_shape)
            else:
                raise ValueError("`fieldtype` should be either 'J' or \
                                  'rho', but is `%s`" % fieldtype)

        # CPU version
        else:
            # Preliminary arrays for the cylindrical conversion
            r = np.sqrt(self.x**2 + self.y**2)
            # Avoid division by 0.
            invr = 1. / np.where(r != 0., r, 1.)
            cos = np.where(r != 0., self.x * invr, 1.)
            sin = np.where(r != 0., self.y * invr, 0.)

            # Indices and weights
            if self.particle_shape == 'cubic':
                shape_order = 3
            else:
                shape_order = 1
            iz, Sz = weights(self.z,
                             grid[0].invdz,
                             grid[0].zmin,
                             grid[0].Nz,
                             direction='z',
                             shape_order=shape_order)
            ir, Sr = weights(r,
                             grid[0].invdr,
                             grid[0].rmin,
                             grid[0].Nr,
                             direction='r',
                             shape_order=shape_order)

            # Number of modes considered :
            # number of elements in the grid list
            Nm = len(grid)

            if fieldtype == 'rho':
                # ---------------------------------------
                # Deposit the charge density mode by mode
                # ---------------------------------------
                # Prepare auxiliary matrix
                exptheta = np.ones(self.Ntot, dtype='complex')
                # exptheta takes the value exp(im theta) throughout the loop
                for m in range(Nm):
                    # Increment exptheta (notice the + : forward transform)
                    if m == 1:
                        exptheta[:].real = cos
                        exptheta[:].imag = sin
                    elif m > 1:
                        exptheta[:] = exptheta * (cos + 1.j * sin)
                    # Deposit the fields
                    # (The sign -1 with which the guards are added is not
                    # trivial to derive but avoids artifacts on the axis)
                    deposit_field_numba(self.w * exptheta, grid[m].rho, iz, ir,
                                        Sz, Sr, -1.)

            elif fieldtype == 'J':
                # ----------------------------------------
                # Deposit the current density mode by mode
                # ----------------------------------------
                # Calculate the currents
                Jr = self.w * c * self.inv_gamma * (cos * self.ux +
                                                    sin * self.uy)
                Jt = self.w * c * self.inv_gamma * (cos * self.uy -
                                                    sin * self.ux)
                Jz = self.w * c * self.inv_gamma * self.uz
                # Prepare auxiliary matrix
                exptheta = np.ones(self.Ntot, dtype='complex')
                # exptheta takes the value exp(im theta) throughout the loop
                for m in range(Nm):
                    # Increment exptheta (notice the + : forward transform)
                    if m == 1:
                        exptheta[:].real = cos
                        exptheta[:].imag = sin
                    elif m > 1:
                        exptheta[:] = exptheta * (cos + 1.j * sin)
                    # Deposit the fields
                    # (The sign -1 with which the guards are added is not
                    # trivial to derive but avoids artifacts on the axis)
                    deposit_field_numba(Jr * exptheta, grid[m].Jr, iz, ir, Sz,
                                        Sr, -1.)
                    deposit_field_numba(Jt * exptheta, grid[m].Jt, iz, ir, Sz,
                                        Sr, -1.)
                    deposit_field_numba(Jz * exptheta, grid[m].Jz, iz, ir, Sz,
                                        Sr, -1.)

            else:
                raise ValueError(
                    "`fieldtype` should be either 'J' or 'rho', but is `%s`" %
                    fieldtype)

Пример #9

0

Показать файл

Файл: particles.py Проект: hemenhosseini/fbpic

    def gather(self, grid):
        """
        Gather the fields onto the macroparticles

        This assumes that the particle positions are currently at
        the same timestep as the field that is to be gathered.

        Parameter
        ----------
        grid : a list of InterpolationGrid objects
             (one InterpolationGrid object per azimuthal mode)
             Contains the field values on the interpolation grid
        """
        if self.use_cuda == True:
            # Get the threads per block and the blocks per grid
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(self.Ntot)
            # Call the CUDA Kernel for the gathering of E and B Fields
            # for Mode 0 and 1 only.
            if self.particle_shape == 'cubic':
                gather_field_gpu_cubic[dim_grid_1d, dim_block_1d](
                    self.x, self.y, self.z, grid[0].invdz, grid[0].zmin,
                    grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                    grid[0].Er, grid[0].Et, grid[0].Ez, grid[1].Er, grid[1].Et,
                    grid[1].Ez, grid[0].Br, grid[0].Bt, grid[0].Bz, grid[1].Br,
                    grid[1].Bt, grid[1].Bz, self.Ex, self.Ey, self.Ez, self.Bx,
                    self.By, self.Bz)
            else:
                gather_field_gpu_linear[dim_grid_1d, dim_block_1d](
                    self.x, self.y, self.z, grid[0].invdz, grid[0].zmin,
                    grid[0].Nz, grid[0].invdr, grid[0].rmin, grid[0].Nr,
                    grid[0].Er, grid[0].Et, grid[0].Ez, grid[1].Er, grid[1].Et,
                    grid[1].Ez, grid[0].Br, grid[0].Bt, grid[0].Bz, grid[1].Br,
                    grid[1].Bt, grid[1].Bz, self.Ex, self.Ey, self.Ez, self.Bx,
                    self.By, self.Bz)
        else:
            # Preliminary arrays for the cylindrical conversion
            r = np.sqrt(self.x**2 + self.y**2)
            # Avoid division by 0.
            invr = 1. / np.where(r != 0., r, 1.)
            cos = np.where(r != 0., self.x * invr, 1.)
            sin = np.where(r != 0., self.y * invr, 0.)

            # Indices and weights
            if self.particle_shape == 'cubic':
                shape_order = 3
            else:
                shape_order = 1
            iz, Sz = weights(self.z,
                             grid[0].invdz,
                             grid[0].zmin,
                             grid[0].Nz,
                             direction='z',
                             shape_order=shape_order)
            ir, Sr = weights(r,
                             grid[0].invdr,
                             grid[0].rmin,
                             grid[0].Nr,
                             direction='r',
                             shape_order=shape_order)

            # Number of modes considered :
            # number of elements in the grid list
            Nm = len(grid)

            # -------------------------------
            # Gather the E field mode by mode
            # -------------------------------
            # Zero the previous fields
            self.Ex[:] = 0.
            self.Ey[:] = 0.
            self.Ez[:] = 0.
            # Prepare auxiliary matrices
            Ft = np.zeros(self.Ntot)
            Fr = np.zeros(self.Ntot)
            exptheta = np.ones(self.Ntot, dtype='complex')
            # exptheta takes the value exp(-im theta) throughout the loop
            for m in range(Nm):
                # Increment exptheta (notice the - : backward transform)
                if m == 1:
                    exptheta[:].real = cos
                    exptheta[:].imag = -sin
                elif m > 1:
                    exptheta[:] = exptheta * (cos - 1.j * sin)
                # Gather the fields
                # (The sign with which the guards are added
                # depends on whether the fields should be zero on axis)
                gather_field_numba(exptheta, m, grid[m].Er, Fr, iz, ir, Sz, Sr,
                                   -((-1.)**m))
                gather_field_numba(exptheta, m, grid[m].Et, Ft, iz, ir, Sz, Sr,
                                   -((-1.)**m))
                gather_field_numba(exptheta, m, grid[m].Ez, self.Ez, iz, ir,
                                   Sz, Sr, (-1.)**m)

            # Convert to Cartesian coordinates
            self.Ex[:] = cos * Fr - sin * Ft
            self.Ey[:] = sin * Fr + cos * Ft

            # -------------------------------
            # Gather the B field mode by mode
            # -------------------------------
            # Zero the previous fields
            self.Bx[:] = 0.
            self.By[:] = 0.
            self.Bz[:] = 0.
            # Prepare auxiliary matrices
            Ft[:] = 0.
            Fr[:] = 0.
            exptheta[:] = 1.
            # exptheta takes the value exp(-im theta) throughout the loop
            for m in range(Nm):
                # Increment exptheta (notice the - : backward transform)
                if m == 1:
                    exptheta[:].real = cos
                    exptheta[:].imag = -sin
                elif m > 1:
                    exptheta[:] = exptheta * (cos - 1.j * sin)
                # Gather the fields
                # (The sign with which the guards are added
                # depends on whether the fields should be zero on axis)
                gather_field_numba(exptheta, m, grid[m].Br, Fr, iz, ir, Sz, Sr,
                                   -((-1.)**m))
                gather_field_numba(exptheta, m, grid[m].Bt, Ft, iz, ir, Sz, Sr,
                                   -((-1.)**m))
                gather_field_numba(exptheta, m, grid[m].Bz, self.Bz, iz, ir,
                                   Sz, Sr, (-1.)**m)

            # Convert to Cartesian coordinates
            self.Bx[:] = cos * Fr - sin * Ft
            self.By[:] = sin * Fr + cos * Ft

Пример #10

0

Показать файл

Файл: ionizer.py Проект: hemenhosseini/fbpic

    def handle_ionization_gpu(self, ion):
        """
        Handle ionization on the GPU:
        - For each ion macroparticle, decide whether it is going to
          be further ionized during this timestep, based on the ADK rate.
        - Add the electrons created from ionization to the `target_species`

        Parameters:
        -----------
        ion: an fbpic.Particles object
            The ionizable species, from which new electrons are created.
        """
        # Process particles in batches (of typically 10, 20 particles)
        N_batch = int(ion.Ntot / self.batch_size) + 1

        # Create temporary arrays
        is_ionized = cuda.device_array((ion.Ntot, ), dtype=np.int16)
        n_ionized = cuda.device_array((N_batch, ), dtype=np.int64)
        # Draw random numbers
        random_draw = cuda.device_array((ion.Ntot, ), dtype=np.float32)
        self.prng.uniform(random_draw)

        # Ionize the ions (one thread per batch)
        batch_grid_1d, batch_block_1d = cuda_tpb_bpg_1d(N_batch)
        ionize_ions_cuda[batch_grid_1d, batch_block_1d](
            N_batch, self.batch_size, ion.Ntot, self.level_max, n_ionized,
            is_ionized, self.ionization_level, random_draw, self.adk_prefactor,
            self.adk_power, self.adk_exp_prefactor, ion.ux, ion.uy, ion.uz,
            ion.Ex, ion.Ey, ion.Ez, ion.Bx, ion.By, ion.Bz, ion.w,
            self.neutral_weight)

        # Count the total number of electrons (operation performed
        # on the CPU, as this is typically difficult on the GPU)
        n_ionized = n_ionized.copy_to_host()
        cumulative_n_ionized = np.zeros(len(n_ionized) + 1, dtype=np.int64)
        np.cumsum(n_ionized, out=cumulative_n_ionized[1:])
        # If no new particle was created, skip the rest of this function
        if cumulative_n_ionized[-1] == 0:
            return

        # Reallocate the electron species, in order to
        # accomodate the electrons produced by ionization
        elec = self.target_species
        old_Ntot = elec.Ntot
        new_Ntot = old_Ntot + cumulative_n_ionized[-1]
        # Iterate over particle attributes and copy the old electrons
        # (one thread per particle)
        ptcl_grid_1d, ptcl_block_1d = cuda_tpb_bpg_1d(old_Ntot)
        for attr in [
                'x', 'y', 'z', 'ux', 'uy', 'uz', 'w', 'inv_gamma', 'Ex', 'Ey',
                'Ez', 'Bx', 'By', 'Bz'
        ]:
            old_array = getattr(elec, attr)
            new_array = cuda.device_array((new_Ntot, ), dtype=np.float64)
            copy_particle_data_cuda[ptcl_grid_1d,
                                    ptcl_block_1d](old_Ntot, old_array,
                                                   new_array)
            setattr(elec, attr, new_array)
        if elec.tracker is not None:
            old_array = elec.tracker.id
            new_array = cuda.device_array((new_Ntot, ), dtype=np.uint64)
            copy_particle_data_cuda[ptcl_grid_1d,
                                    ptcl_block_1d](old_Ntot, old_array,
                                                   new_array)
            elec.tracker.id = new_array
        # Allocate the auxiliary arrays
        elec.cell_idx = cuda.device_array((new_Ntot, ), dtype=np.int32)
        elec.sorted_idx = cuda.device_array((new_Ntot, ), dtype=np.uint32)
        elec.sorting_buffer = cuda.device_array((new_Ntot, ), dtype=np.float64)
        if elec.n_integer_quantities > 0:
            elec.int_sorting_buffer = \
                cuda.device_array( (new_Ntot,), dtype=np.uint64 )
        # Modify the total number of electrons
        elec.Ntot = new_Ntot

        # Send `cumulative_n_ionized` back to the GPU
        cumulative_n_ionized = cuda.to_device(cumulative_n_ionized)
        # Copy the new electrons from ionization (one thread per batch)
        copy_ionized_electrons_cuda[batch_grid_1d, batch_block_1d](
            N_batch, self.batch_size, old_Ntot, ion.Ntot, cumulative_n_ionized,
            is_ionized, elec.x, elec.y, elec.z, elec.inv_gamma, elec.ux,
            elec.uy, elec.uz, elec.w, elec.Ex, elec.Ey, elec.Ez, elec.Bx,
            elec.By, elec.Bz, ion.x, ion.y, ion.z, ion.inv_gamma, ion.ux,
            ion.uy, ion.uz, self.neutral_weight, ion.Ex, ion.Ey, ion.Ez,
            ion.Bx, ion.By, ion.Bz)
        elec.sorted = False

        # If the electrons are tracked, generate new ids
        if elec.tracker is not None:
            elec.tracker.generate_new_ids_gpu(old_Ntot, new_Ntot)

Пример #11

0

Показать файл

Файл: boosted_field_diag.py Проект: hemenhosseini/fbpic

    def extract_slice(self, fld, comm, z_boost, zmin_boost, slice_array):
        """
        Fills `slice_array` with a slice of the fields at z_boost
        (the fields returned are still in the boosted frame ;
        for performance, the Lorentz transform of the fields values
        is performed only when flushing to disk)

        Parameters
        ----------
        fld: a Fields object
            The object from which to extract the fields

        comm: a BoundaryCommunicator object
            Contains information about the gard cells in particular

        z_boost: float (meters)
            Position of the slice in the boosted frame

        zmin_boost: float (meters)
            Position of the left end of physical part of the local subdomain
            (i.e. excludes guard cells)

        slice_array: either a numpy array or a cuda device array
            An array of reals that packs together the slices of the
            different fields (always on array on the CPU).
            The first index of this array corresponds to the field type
            (10 different field types), and the correspondance
            between the field type and integer index is given field_to_index
            The shape of this arrays is (10, 2*Nm-1, Nr)
        """
        # Find the index of the slice in the boosted frame
        # and the corresponding interpolation shape factor
        dz = fld.interp[0].dz
        # Find the interpolation data in the z direction
        z_staggered_gridunits = (z_boost - zmin_boost - 0.5 * dz) / dz
        iz = int(z_staggered_gridunits)
        Sz = iz + 1 - z_staggered_gridunits
        # Add the guard cells to the index iz
        if comm is not None:
            iz += comm.n_guard

        # Extract the slice directly on the CPU
        # Fill the pre-allocated CPU array slice_array
        if fld.use_cuda is False:

            # Extract a slice of the fields *in the boosted frame*
            # at z_boost, using interpolation, and store them in slice_array
            self.extract_slice_cpu(fld, iz, Sz, slice_array)

        # Extract the slice on the GPU
        # Fill the pre-allocated GPU array slice_array
        else:
            # Prepare kernel call
            interp = fld.interp
            Nr = fld.Nr
            dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(Nr)

            # Extract the slices
            slice_array = extract_slice_cuda[dim_grid_1d, dim_block_1d](
                Nr, iz, Sz, slice_array, interp[0].Er, interp[0].Et,
                interp[0].Ez, interp[0].Br, interp[0].Bt, interp[0].Bz,
                interp[0].Jr, interp[0].Jt, interp[0].Jz, interp[0].rho,
                interp[1].Er, interp[1].Et, interp[1].Ez, interp[1].Br,
                interp[1].Bt, interp[1].Bz, interp[1].Jr, interp[1].Jt,
                interp[1].Jz, interp[1].rho)

Python cuda_tpb_bpg_1d примеры использования