Python cuda_tpb_bpg_2d примеры, fbpic.utils.cuda.cuda_tpb_bpg_2d Python примеры использования

Пример #1

0

Показать файл

Файл: boundary_communicator.py Проект: zcl-maker/fbpic

    def damp_EB_open_boundary( self, interp ):
        """
        Damp the fields E and B in the damp cells, at the right and left
        of the *global* simulation box.

        Parameter:
        -----------
        interp: list of InterpolationGrid objects (one per azimuthal mode)
            Objects that contain the fields to be damped.
        """
        # Do not damp the fields for 0 n_damp cells (periodic)
        if self.n_damp != 0:
            # Total size of the damping and guard region
            nd = self.n_guard + self.n_damp + self.n_inject

            if self.left_proc is None:
                # Damp the fields on the CPU or the GPU
                if interp[0].use_cuda:
                    # Damp the fields on the GPU
                    dim_grid, dim_block = cuda_tpb_bpg_2d(
                        nd, interp[0].Nr )
                    for m in range(len(interp)):
                        cuda_damp_EB_left[dim_grid, dim_block](
                            interp[m].Er, interp[m].Et, interp[m].Ez,
                            interp[m].Br, interp[m].Bt, interp[m].Bz,
                            self.d_left_damp, nd)
                else:
                    # Damp the fields on the CPU
                    for m in range(len(interp)):
                        # Damp the fields in left guard cells
                        interp[m].Er[:nd,:]*=self.left_damp[:,np.newaxis]
                        interp[m].Et[:nd,:]*=self.left_damp[:,np.newaxis]
                        interp[m].Ez[:nd,:]*=self.left_damp[:,np.newaxis]
                        interp[m].Br[:nd,:]*=self.left_damp[:,np.newaxis]
                        interp[m].Bt[:nd,:]*=self.left_damp[:,np.newaxis]
                        interp[m].Bz[:nd,:]*=self.left_damp[:,np.newaxis]

            if self.right_proc is None:
                # Damp the fields on the CPU or the GPU
                if interp[0].use_cuda:
                    # Damp the fields on the GPU
                    dim_grid, dim_block = cuda_tpb_bpg_2d(
                        nd, interp[0].Nr )
                    for m in range(len(interp)):
                        cuda_damp_EB_right[dim_grid, dim_block](
                            interp[m].Er, interp[m].Et, interp[m].Ez,
                            interp[m].Br, interp[m].Bt, interp[m].Bz,
                            self.d_right_damp, nd)
                else:
                    # Damp the fields on the CPU
                    for m in range(len(interp)):
                        # Damp the fields in left guard cells
                        interp[m].Er[-nd:,:]*=self.right_damp[::-1,np.newaxis]
                        interp[m].Et[-nd:,:]*=self.right_damp[::-1,np.newaxis]
                        interp[m].Ez[-nd:,:]*=self.right_damp[::-1,np.newaxis]
                        interp[m].Br[-nd:,:]*=self.right_damp[::-1,np.newaxis]
                        interp[m].Bt[-nd:,:]*=self.right_damp[::-1,np.newaxis]
                        interp[m].Bz[-nd:,:]*=self.right_damp[::-1,np.newaxis]

Пример #2

0

Показать файл

    def damp_pml_EB( self, interp ):
        """
        Damp the fields E and B in the PML cells.

        Parameters
        ----------
        interp: list of InterpolationGrid objects (one per azimuthal mode)
            Objects that contain the fields to be damped.
        """
        # Damp the fields on the CPU or the GPU
        if interp[0].use_cuda:
            # Damp the fields on the GPU
            dim_grid, dim_block = cuda_tpb_bpg_2d( interp[0].Nz, self.n_pml )
            for m in range(len(interp)):
                cuda_damp_pml_EB[dim_grid, dim_block](
                    interp[m].Et, interp[m].Et_pml, interp[m].Ez,
                    interp[m].Bt, interp[m].Bt_pml, interp[m].Bz,
                    self.d_damp_array, self.n_pml )

        else:
            # Damp the fields on the CPU
            n_pml = self.n_pml
            for m in range(len(interp)):
                # Substract the theta PML fields to the regular theta fields
                interp[m].Et[:,-n_pml:] -= interp[m].Et_pml[:,-n_pml:]
                interp[m].Bt[:,-n_pml:] -= interp[m].Bt_pml[:,-n_pml:]
                # Damp the theta PML fields
                interp[m].Et_pml[:,-n_pml:] *= self.damp_array[np.newaxis, :]
                interp[m].Bt_pml[:,-n_pml:] *= self.damp_array[np.newaxis, :]
                # Add the theta PML fields back to the regular theta fields
                interp[m].Et[:,-n_pml:] += interp[m].Et_pml[:,-n_pml:]
                interp[m].Bt[:,-n_pml:] += interp[m].Bt_pml[:,-n_pml:]
                # Damp the z fields
                interp[m].Bz[:,-n_pml:] *= self.damp_array[np.newaxis, :]
                interp[m].Ez[:,-n_pml:] *= self.damp_array[np.newaxis, :]

Пример #3

0

Показать файл

    def shift_spect_grid( self, grid, n_move,
                          shift_rho=True, shift_currents=True ):
        """
        Shift the spectral fields by n_move cells (with respect to the
        spatial grid). Shifting is done either on the CPU or the GPU,
        if use_cuda is True. (Typically n_move is positive, and the
        fields are shifted backwards)

        Parameters
        ----------
        grid: an SpectralGrid corresponding to one given azimuthal mode
            Contains the values of the fields in spectral space,
            and is modified by this function.

        n_move: int
            The number of cells by which the grid should be shifted

        shift_rho: bool, optional
            Whether to also shift the charge density
            Default: True, since rho is only recalculated from
            scratch when the particles are exchanged

        shift_currents: bool, optional
            Whether to also shift the currents
            Default: False, since the currents are recalculated from
            scratch at each PIC cycle
        """
        if grid.use_cuda:
            shift = grid.d_field_shift
            # Get a 2D CUDA grid of the size of the grid
            tpb, bpg = cuda_tpb_bpg_2d( grid.Ep.shape[0], grid.Ep.shape[1] )
            # Shift all the fields on the GPU
            shift_spect_array_gpu[tpb, bpg]( grid.Ep, shift, n_move )
            shift_spect_array_gpu[tpb, bpg]( grid.Em, shift, n_move )
            shift_spect_array_gpu[tpb, bpg]( grid.Ez, shift, n_move )
            shift_spect_array_gpu[tpb, bpg]( grid.Bp, shift, n_move )
            shift_spect_array_gpu[tpb, bpg]( grid.Bm, shift, n_move )
            shift_spect_array_gpu[tpb, bpg]( grid.Bz, shift, n_move )
            if shift_rho:
                shift_spect_array_gpu[tpb, bpg]( grid.rho_prev, shift, n_move )
            if shift_currents:
                shift_spect_array_gpu[tpb, bpg]( grid.Jp, shift, n_move )
                shift_spect_array_gpu[tpb, bpg]( grid.Jm, shift, n_move )
                shift_spect_array_gpu[tpb, bpg]( grid.Jz, shift, n_move )
        else:
            shift = grid.field_shift
            # Shift all the fields on the CPU
            shift_spect_array_cpu( grid.Ep, shift, n_move )
            shift_spect_array_cpu( grid.Em, shift, n_move )
            shift_spect_array_cpu( grid.Ez, shift, n_move )
            shift_spect_array_cpu( grid.Bp, shift, n_move )
            shift_spect_array_cpu( grid.Bm, shift, n_move )
            shift_spect_array_cpu( grid.Bz, shift, n_move )
            if shift_rho:
                shift_spect_array_cpu( grid.rho_prev, shift, n_move )
            if shift_currents:
                shift_spect_array_cpu( grid.Jp, shift, n_move )
                shift_spect_array_cpu( grid.Jm, shift, n_move )
                shift_spect_array_cpu( grid.Jz, shift, n_move )

Пример #4

0

Показать файл

    def erase(self, fieldtype):
        """
        Sets the field `fieldtype` to zero on the interpolation grid

        Parameter
        ---------
        fieldtype : string
            A string which represents the kind of field to be erased
            (either 'E', 'B', 'J', 'rho')
        """
        if self.use_cuda:
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d(self.Nz, self.Nr)

            # Erase the arrays on the GPU
            if fieldtype == 'rho':
                for m in range(self.Nm):
                    cuda_erase_scalar[dim_grid, dim_block](self.interp[m].rho)
            elif fieldtype == 'J':
                for m in range(self.Nm):
                    cuda_erase_vector[dim_grid, dim_block](self.interp[m].Jr,
                                                           self.interp[m].Jt,
                                                           self.interp[m].Jz)
            elif fieldtype == 'E':
                for m in range(self.Nm):
                    cuda_erase_vector[dim_grid, dim_block](self.interp[m].Er,
                                                           self.interp[m].Et,
                                                           self.interp[m].Ez)
            elif fieldtype == 'B':
                for m in range(self.Nm):
                    cuda_erase_vector[dim_grid, dim_block](self.interp[m].Br,
                                                           self.interp[m].Bt,
                                                           self.interp[m].Bz)
            else:
                raise ValueError('Invalid string for fieldtype: %s' %
                                 fieldtype)
        else:
            # Erase the arrays on the CPU
            if fieldtype == 'rho':
                for m in range(self.Nm):
                    self.interp[m].rho[:, :] = 0.
            elif fieldtype == 'J':
                for m in range(self.Nm):
                    self.interp[m].Jr[:, :] = 0.
                    self.interp[m].Jt[:, :] = 0.
                    self.interp[m].Jz[:, :] = 0.
            elif fieldtype == 'E':
                for m in range(self.Nm):
                    self.interp[m].Er[:, :] = 0.
                    self.interp[m].Et[:, :] = 0.
                    self.interp[m].Ez[:, :] = 0.
            elif fieldtype == 'B':
                for m in range(self.Nm):
                    self.interp[m].Br[:, :] = 0.
                    self.interp[m].Bt[:, :] = 0.
                    self.interp[m].Bz[:, :] = 0.
            else:
                raise ValueError('Invalid string for fieldtype: %s' %
                                 fieldtype)

Пример #5

0

Показать файл

Файл: spectral_grid.py Проект: skuschel/fbpic

    def filter(self, fieldtype) :
        """
        Filter the field `fieldtype`

        Parameter
        ---------
        fieldtype : string
            A string which represents the kind of field to be filtered
            (either 'E', 'B', 'J', 'rho_next' or 'rho_prev')
        """
        if self.use_cuda :
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d( self.Nz, self.Nr )
            # Filter fields on the GPU
            if fieldtype == 'J' :
                cuda_filter_vector[dim_grid, dim_block](
                        self.Jp, self.Jm, self.Jz, self.Nz, self.Nr,
                        self.d_filter_array_z, self.d_filter_array_r )
            elif fieldtype == 'E' :
                cuda_filter_vector[dim_grid, dim_block](
                        self.Ep, self.Em, self.Ez, self.Nz, self.Nr,
                        self.d_filter_array_z, self.d_filter_array_r )
            elif fieldtype == 'B' :
                cuda_filter_vector[dim_grid, dim_block](
                        self.Bp, self.Bm, self.Bz, self.Nz, self.Nr,
                        self.d_filter_array_z, self.d_filter_array_r )
            elif fieldtype in ['rho_prev', 'rho_next',
                                'rho_next_z', 'rho_next_xy']:
                spectral_rho = getattr( self, fieldtype )
                cuda_filter_scalar[dim_grid, dim_block](
                        spectral_rho, self.Nz, self.Nr,
                        self.d_filter_array_z, self.d_filter_array_r )
            else :
                raise ValueError('Invalid string for fieldtype: %s'%fieldtype)
        else :
            # Filter fields on the CPU
            if fieldtype == 'J' :
                numba_filter_vector(
                        self.Jp, self.Jm, self.Jz, self.Nz, self.Nr,
                        self.filter_array_z, self.filter_array_r )
            elif fieldtype == 'E' :
                numba_filter_vector(
                        self.Ep, self.Em, self.Ez, self.Nz, self.Nr,
                        self.filter_array_z, self.filter_array_r )
            elif fieldtype == 'B' :
                numba_filter_vector(
                        self.Bp, self.Bm, self.Bz, self.Nz, self.Nr,
                        self.filter_array_z, self.filter_array_r )
            elif fieldtype in ['rho_prev', 'rho_next',
                                'rho_next_z', 'rho_next_xy']:
                spectral_rho = getattr( self, fieldtype )
                numba_filter_scalar(
                        spectral_rho, self.Nz, self.Nr,
                        self.filter_array_z, self.filter_array_r )
            else :
                raise ValueError('Invalid string for fieldtype: %s'%fieldtype)

Пример #6

0

Показать файл

 def push_rho(self):
     """
     Transfer the values of rho_next to rho_prev,
     and set rho_next to zero
     """
     if self.use_cuda:
         # Obtain the cuda grid
         dim_grid, dim_block = cuda_tpb_bpg_2d(self.Nz, self.Nr)
         # Push the fields on the GPU
         cuda_push_rho[dim_grid, dim_block](self.rho_prev, self.rho_next,
                                            self.Nz, self.Nr)
     else:
         # Push the fields on the CPU
         self.rho_prev[:, :] = self.rho_next[:, :]
         self.rho_next[:, :] = 0.

Пример #7

0

Показать файл

    def divide_by_volume(self, fieldtype):
        """
        Divide the field `fieldtype` in each cell by the cell volume,
        on the interpolation grid.

        This is typically done for rho and J, after the charge and
        current deposition.

        Parameter
        ---------
        fieldtype :
            A string which represents the kind of field to be divided by
            the volume (either 'rho' or 'J')
        """
        if self.use_cuda:
            # Perform division on the GPU
            dim_grid, dim_block = cuda_tpb_bpg_2d(self.Nz, self.Nr)

            if fieldtype == 'rho':
                for m in range(self.Nm):
                    cuda_divide_scalar_by_volume[dim_grid, dim_block](
                        self.interp[m].rho, self.interp[m].d_invvol)
            elif fieldtype == 'J':
                for m in range(self.Nm):
                    cuda_divide_vector_by_volume[dim_grid, dim_block](
                        self.interp[m].Jr, self.interp[m].Jt,
                        self.interp[m].Jz, self.interp[m].d_invvol)
            else:
                raise ValueError('Invalid string for fieldtype: %s' %
                                 fieldtype)
        else:
            # Perform division on the CPU
            if fieldtype == 'rho':
                for m in range(self.Nm):
                    self.interp[m].rho = \
                    self.interp[m].rho * self.interp[m].invvol[np.newaxis,:]
            elif fieldtype == 'J':
                for m in range(self.Nm):
                    self.interp[m].Jr = \
                    self.interp[m].Jr * self.interp[m].invvol[np.newaxis,:]
                    self.interp[m].Jt = \
                    self.interp[m].Jt * self.interp[m].invvol[np.newaxis,:]
                    self.interp[m].Jz = \
                    self.interp[m].Jz * self.interp[m].invvol[np.newaxis,:]
            else:
                raise ValueError('Invalid string for fieldtype: %s' %
                                 fieldtype)

Пример #8

0

Показать файл

    def __init__(self, Nz, Nr, m, rmax, use_cuda=False):
        """
        Initializes the dht and fft attributes, which contain auxiliary
        matrices allowing to transform the fields quickly

        Parameters
        ----------
        Nz, Nr : int
            Number of points along z and r respectively

        m : int
            Index of the mode (needed for the Hankel transform)

        rmax : float
            The size of the simulation box along r.
        """
        # Check whether to use the GPU
        self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False):
            self.use_cuda = False
        if self.use_cuda:
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr, 1, 32)

        # Initialize the DHT (local implementation, see hankel.py)
        self.dht0 = DHT(m, m, Nr, Nz, rmax, use_cuda=self.use_cuda)
        self.dhtp = DHT(m + 1, m, Nr, Nz, rmax, use_cuda=self.use_cuda)
        self.dhtm = DHT(m - 1, m, Nr, Nz, rmax, use_cuda=self.use_cuda)

        # Initialize the FFT
        self.fft = FFT(Nr, Nz, use_cuda=self.use_cuda)

        # Initialize the spectral buffers
        if self.use_cuda:
            self.spect_buffer_r = cuda.device_array((Nz, Nr),
                                                    dtype=np.complex128)
            self.spect_buffer_t = cuda.device_array((Nz, Nr),
                                                    dtype=np.complex128)
        else:
            # Initialize the spectral buffers
            self.spect_buffer_r = np.zeros((Nz, Nr), dtype=np.complex128)
            self.spect_buffer_t = np.zeros((Nz, Nr), dtype=np.complex128)

        # Different names for same object (for economy of memory)
        self.spect_buffer_p = self.spect_buffer_r
        self.spect_buffer_m = self.spect_buffer_t

Пример #9

0

Показать файл

    def correct_currents(self, dt, ps):
        """
        Correct the currents so that they satisfy the
        charge conservation equation

        Parameters
        ----------
        dt : float
            Timestep of the simulation
        """
        # Precalculate useful coefficient
        inv_dt = 1. / dt

        if self.use_cuda:
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d(self.Nz, self.Nr)
            # Correct the currents on the GPU
            if ps.V is None:
                # With standard PSATD algorithm
                cuda_correct_currents_standard[dim_grid, dim_block](
                    self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz,
                    self.d_kz, self.d_kr, self.d_inv_k2, inv_dt, self.Nz,
                    self.Nr)
            else:
                # With Galilean/comoving algorithm
                cuda_correct_currents_comoving[dim_grid, dim_block](
                    self.rho_prev, self.rho_next, self.Jp, self.Jm, self.Jz,
                    self.d_kz, self.d_kr, self.d_inv_k2, ps.d_j_corr_coef,
                    ps.d_T_eb, ps.d_T_cc, inv_dt, self.Nz, self.Nr)
        else:
            # Correct the currents on the CPU
            if ps.V is None:
                # With standard PSATD algorithm
                numba_correct_currents_standard(self.rho_prev, self.rho_next,
                                                self.Jp, self.Jm, self.Jz,
                                                self.kz, self.kr, self.inv_k2,
                                                inv_dt, self.Nz, self.Nr)
            else:
                # With Galilean/comoving algorithm
                numba_correct_currents_comoving(self.rho_prev, self.rho_next,
                                                self.Jp, self.Jm, self.Jz,
                                                self.kz, self.kr, self.inv_k2,
                                                ps.j_corr_coef, ps.T_eb,
                                                ps.T_cc, inv_dt, self.Nz,
                                                self.Nr)

Пример #10

0

Показать файл

    def handle_scal_buffer(self,
                           grid,
                           method,
                           exchange_type,
                           use_cuda,
                           before_sending=False,
                           after_receiving=False,
                           gpudirect=False):
        """
        Scalar field buffer handling

        1) Copies data from the field grid to the MPI sending buffers
        -- or --
        2) Replaces or adds MPI sending buffers to the field grid

        For method 'replace':

        Either copy the inner part of the domain to the sending buffer
        for a scalar field, or replace the receving buffer for a scalar field
        to the guard cells of the domain.

        For method 'add':

        Either copy the inner part and the guard region of the domain to the
        sending buffer for a scalar field, or add the receving buffer for the
        scalar field to the guard cells and the inner region of the domain.

        Depending on whether the field data is initially on the CPU
        or on the GPU, this function will do the appropriate exchange
        with the device.

        Parameters
        ----------
        grid: list of 2darrays
            (One element per azimuthal mode)
            The 2d arrays represent the fields on the interpolation grid

        method: str
            Can either be 'replace' or 'add' depending on the type
            of field exchange that is needed

        use_cuda: bool
            Whether the simulation runs on GPUs. If True,
            the buffers are copied to the GPU arrays after the MPI exchange.

        before_sending: bool
            Whether to copy the inner part of the domain to the sending buffer

        after_receiving: bool
            Whether to copy the receiving buffer to the guard cells

        gpudirect: bool
            - if `gpudirect` is True:
              Uses the CUDA GPUDirect feature on clusters
              that have a working CUDA-aware MPI implementation.
            - if `gpudirect` is False: (default)
              Standard MPI communication is performed when using CUDA
              for computation. This involves a manual GPU to CPU memory
              copy before exchanging information between MPI domains.
        """
        # Define region that is copied to or from the buffer
        # depending on the method used.
        if method == 'replace':
            nz_start = self.n_guard
            nz_end = 2 * self.n_guard
        if method == 'add':
            nz_start = 0
            nz_end = 2 * self.n_guard
        # Whether or not to send to the left or right neighbor
        copy_left = (self.left_proc is not None)
        copy_right = (self.right_proc is not None)
        Nz = grid[0].shape[0]

        # When using the GPU
        if use_cuda:
            # Calculate the number of blocks and threads per block
            dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(
                nz_end - nz_start, self.Nr)

            if before_sending:
                # Copy the inner regions of the domain to the buffers
                for m in range(self.Nm):
                    copy_scal_to_gpu_buffer[dim_grid_2d, dim_block_2d](
                        self.d_send_l[exchange_type],
                        self.d_send_r[exchange_type], grid[m], m, copy_left,
                        copy_right, nz_start, nz_end)
                # If GPUDirect with CUDA-aware MPI is not used,
                # copy the GPU buffers to the sending CPU buffers
                if not gpudirect:
                    if copy_left:
                        self.d_send_l[exchange_type].copy_to_host(
                            self.send_l[exchange_type])
                    if copy_right:
                        self.d_send_r[exchange_type].copy_to_host(
                            self.send_r[exchange_type])

            elif after_receiving:
                # If GPUDirect with CUDA-aware MPI is not used,
                # copy the CPU receiving buffers to the GPU buffers
                if not gpudirect:
                    if copy_left:
                        self.d_recv_l[exchange_type].copy_to_device(
                            self.recv_l[exchange_type])
                    if copy_right:
                        self.d_recv_r[exchange_type].copy_to_device(
                            self.recv_r[exchange_type])
                if method == 'replace':
                    # Replace the guard cells of the domain with the buffers
                    for m in range(self.Nm):
                        replace_scal_from_gpu_buffer[
                            dim_grid_2d,
                            dim_block_2d](self.d_recv_l[exchange_type],
                                          self.d_recv_r[exchange_type],
                                          grid[m], m, copy_left, copy_right,
                                          nz_start, nz_end)
                elif method == 'add':
                    # Add the buffers to the domain
                    for m in range(self.Nm):
                        add_scal_from_gpu_buffer[dim_grid_2d, dim_block_2d](
                            self.d_recv_l[exchange_type],
                            self.d_recv_r[exchange_type], grid[m], m,
                            copy_left, copy_right, nz_start, nz_end)

        # Without GPU
        else:

            if before_sending:

                send_l = self.send_l[exchange_type]
                send_r = self.send_r[exchange_type]
                # Copy the inner regions of the domain to the buffer
                if copy_left:
                    for m in range(self.Nm):
                        send_l[m, :, :] = grid[m][nz_start:nz_end, :]
                if copy_right:
                    for m in range(self.Nm):
                        send_r[m, :, :] = grid[m][Nz - nz_end:Nz - nz_start, :]

            elif after_receiving:

                recv_l = self.recv_l[exchange_type]
                recv_r = self.recv_r[exchange_type]
                if method == 'replace':
                    # Replace the guard cells of the domain with the buffers
                    if copy_left:
                        for m in range(self.Nm):
                            grid[m][:nz_end - nz_start, :] = recv_l[m, :, :]
                    if copy_right:
                        for m in range(self.Nm):
                            grid[m][-(nz_end - nz_start):, :] = recv_r[m, :, :]

                if method == 'add':
                    # Add buffers to the domain
                    if copy_left:
                        for m in range(self.Nm):
                            grid[m][:nz_end - nz_start, :] += recv_l[m, :, :]
                    if copy_right:
                        for m in range(self.Nm):
                            grid[m][-(nz_end -
                                      nz_start):, :] += recv_r[m, :, :]

Пример #11

0

Показать файл

    def push_eb_with(self, ps, use_true_rho=False):
        """
        Push the fields over one timestep, using the psatd coefficients.

        Parameters
        ----------
        ps : PsatdCoeffs object
            psatd object corresponding to the same m mode

        use_true_rho : bool, optional
            Whether to use the rho projected on the grid.
            If set to False, this will use div(E) and div(J)
            to evaluate rho and its time evolution.
            In the case use_true_rho==False, the rho projected
            on the grid is used only to correct the currents, and
            the simulation can be run without the neutralizing ions.
        """
        # Check that psatd object passed as argument is the right one
        # (i.e. corresponds to the right mode)
        assert (self.m == ps.m)

        if self.use_cuda:
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d(self.Nz, self.Nr, 1, 16)
            # Push the fields on the GPU
            if ps.V is None:
                # With the standard PSATD algorithm
                if self.use_pml:
                    # Push the PML split component
                    cuda_push_eb_pml_standard[dim_grid, dim_block](
                        self.Ep_pml, self.Em_pml, self.Bp_pml, self.Bm_pml,
                        self.Ez, self.Bz, ps.d_C, ps.d_S_w, self.d_kr,
                        self.d_kz, self.Nz, self.Nr)
                # Push the regular fields
                cuda_push_eb_standard[dim_grid, dim_block](
                    self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz,
                    self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next,
                    ps.d_rho_prev_coef, ps.d_rho_next_coef, ps.d_j_coef,
                    ps.d_C, ps.d_S_w, self.d_kr, self.d_kz, ps.dt,
                    use_true_rho, self.Nz, self.Nr)
            else:
                # With the Galilean/comoving algorithm
                if self.use_pml:
                    # Push the PML split component
                    cuda_push_eb_pml_comoving[dim_grid, dim_block](
                        self.Ep_pml, self.Em_pml, self.Bp_pml, self.Bm_pml,
                        self.Ez, self.Bz, ps.d_C, ps.d_S_w, ps.d_T_eb,
                        self.d_kr, self.d_kz, self.Nz, self.Nr)
                # Push the regular fields
                cuda_push_eb_comoving[dim_grid, dim_block](
                    self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz,
                    self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next,
                    ps.d_rho_prev_coef, ps.d_rho_next_coef, ps.d_j_coef,
                    ps.d_C, ps.d_S_w, ps.d_T_eb, ps.d_T_cc, ps.d_T_rho,
                    self.d_kr, self.d_kz, ps.dt, ps.V, use_true_rho, self.Nz,
                    self.Nr)
        else:
            # Push the fields on the CPU
            if ps.V is None:
                # With the standard PSATD algorithm
                if self.use_pml:
                    # Push the PML split component
                    numba_push_eb_pml_standard(self.Ep_pml, self.Em_pml,
                                               self.Bp_pml, self.Bm_pml,
                                               self.Ez, self.Bz, ps.C, ps.S_w,
                                               self.kr, self.kz, self.Nz,
                                               self.Nr)
                # Push the regular fields
                numba_push_eb_standard(self.Ep, self.Em, self.Ez, self.Bp,
                                       self.Bm, self.Bz, self.Jp, self.Jm,
                                       self.Jz, self.rho_prev, self.rho_next,
                                       ps.rho_prev_coef, ps.rho_next_coef,
                                       ps.j_coef, ps.C, ps.S_w, self.kr,
                                       self.kz, ps.dt, use_true_rho, self.Nz,
                                       self.Nr)

            else:
                # With the Galilean/comoving algorithm
                if self.use_pml:
                    # Push the PML split component
                    numba_push_eb_pml_comoving(self.Ep_pml, self.Em_pml,
                                               self.Bp_pml, self.Bm_pml,
                                               self.Ez, self.Bz, ps.C, ps.S_w,
                                               ps.T_eb, self.kr, self.kz,
                                               self.Nz, self.Nr)
                # Push the regular fields
                numba_push_eb_comoving(
                    self.Ep, self.Em, self.Ez, self.Bp, self.Bm, self.Bz,
                    self.Jp, self.Jm, self.Jz, self.rho_prev, self.rho_next,
                    ps.rho_prev_coef, ps.rho_next_coef, ps.j_coef, ps.C,
                    ps.S_w, ps.T_eb, ps.T_cc, ps.T_rho, self.kr, self.kz,
                    ps.dt, ps.V, use_true_rho, self.Nz, self.Nr)

Пример #12

0

Показать файл

    def __init__(self, Nr, Nz, use_cuda=False, nthreads=None):
        """
        Initialize an FFT object

        Parameters
        ----------
        Nr: int
           Number of grid points along the r axis (axis -1)

        Nz: int
           Number of grid points along the z axis (axis 0)

        use_cuda: bool, optional
           Whether to perform the Fourier transform on the z axis

        nthreads : int, optional
            Number of threads for the FFTW transform.
            If None, the default number of threads of numba is used
            (environment variable NUMBA_NUM_THREADS)
        """
        # Check whether to use cuda
        self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False):
            self.use_cuda = False
            print('** Cuda not available for Fourier transform.')
            print('** Performing the Fourier transform on the CPU.')

        # Check whether to use MKL
        self.use_mkl = mkl_installed

        # Initialize the object for calculation on the GPU
        if self.use_cuda:
            # Set optimal number of CUDA threads per block
            # for copy 1d/2d kernels (determined empirically)
            copy_tpb = (8, 32) if cuda_gpu_model == "V100" else (2, 16)
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr, *copy_tpb)
            # Initialize 1d buffer for cufft
            self.buffer1d_in = cupy.empty((Nz * Nr, ), dtype=np.complex128)
            self.buffer1d_out = cupy.empty((Nz * Nr, ), dtype=np.complex128)
            # Initialize the CUDA FFT plan object
            self.fft = cufft.Plan1d(Nz, cufft.CUFFT_Z2Z, Nr)
            self.inv_Nz = 1. / Nz  # For normalization of the iFFT

        # Initialize the object for calculation on the CPU
        else:
            # For MKL FFT
            if self.use_mkl:
                # Initialize the MKL plan with dummy array
                spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                self.mklfft = MKLFFT(spect_buffer)

            # For FFTW
            else:
                # Determine number of threads
                if nthreads is None:
                    # Get the default number of threads for numba
                    nthreads = numba.config.NUMBA_NUM_THREADS
                # Initialize the FFT plan with dummy arrays
                interp_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                self.fft = pyfftw.FFTW(interp_buffer,
                                       spect_buffer,
                                       axes=(0, ),
                                       direction='FFTW_FORWARD',
                                       threads=nthreads)
                self.ifft = pyfftw.FFTW(spect_buffer,
                                        interp_buffer,
                                        axes=(0, ),
                                        direction='FFTW_BACKWARD',
                                        threads=nthreads)

Пример #13

0

Показать файл

    def __init__(self, p, m, Nr, Nz, rmax, use_cuda=False ):
        """
        Calculate the r (position) and nu (frequency) grid
        on which the transform will operate.

        Also store auxiliary data needed for the transform.

        Parameters:
        ------------
        p: int
        Order of the Hankel transform

        m: int
        The azimuthal mode for which the Hankel transform is calculated

        Nr, Nz: float
        Number of points in the r direction and z direction

        rmax: float
        Edge of the box in which the Hankel transform is taken
        (The function is assumed to be zero at that point.)

        use_cuda: bool, optional
        Whether to use the GPU for the Hankel transform
        """
        # Register whether to use the GPU.
        # If yes, initialize the corresponding cuda object
        self.use_cuda = use_cuda
        if (self.use_cuda==True) and (cuda_installed==False):
            self.use_cuda = False
            print('** Cuda not available for Hankel transform.')
            print('** Performing the Hankel transform on the CPU.')

        # Check that m has a valid value
        if (m in [p-1, p, p+1]) == False:
            raise ValueError('m must be either p-1, p or p+1')

        # Register values of the arguments
        self.p = p
        self.m = m
        self.Nr = Nr
        self.rmax = rmax
        self.Nz = Nz

        # Calculate the zeros of the Bessel function
        if m !=0:
            # In this case, 0 is a zero of the Bessel function of order m.
            # It turns out that it is needed to reconstruct the signal for p=0.
            alphas = np.hstack( (np.array([0.]), jn_zeros(m, Nr-1)) )
        else:
            alphas = jn_zeros(m, Nr)

        # Calculate the spectral grid
        self.nu = 1./(2*np.pi*rmax) * alphas

        # Calculate the spatial grid (Uniform grid with an half-cell offset)
        self.r = (rmax*1./Nr) * ( np.arange(Nr) + 0.5 )

        # Calculate and store the inverse matrix invM
        # (imposed by the constraints on the DHT of Bessel modes)
        # NB: When compared with the FBPIC article, all the matrices here
        # are calculated in transposed form. This is done so as to use the
        # `dot` and `gemm` functions, in the `transform` method.
        self.invM = np.empty((Nr, Nr))
        if p == m:
            p_denom = p+1
        else:
            p_denom = p
        denom = np.pi * rmax**2 * jn( p_denom, alphas)**2
        num = jn( p, 2*np.pi* self.r[np.newaxis,:]*self.nu[:,np.newaxis] )
        # Get the inverse matrix
        if m!=0:
            self.invM[1:, :] = num[1:, :] / denom[1:, np.newaxis]
            # In this case, the functions are represented by Bessel functions
            # *and* an additional mode (below) which satisfies the same
            # algebric relations for curl/div/grad as the regular Bessel modes,
            # with the value kperp=0.
            # The normalization of this mode is arbitrary, and is chosen
            # so that the condition number of invM is close to 1
            if p==m-1:
                self.invM[0, :] = self.r**(m-1) * 1./( np.pi * rmax**(m+1) )
            else:
                self.invM[0, :] = 0.
        else :
            self.invM[:, :] = num[:, :] / denom[:, np.newaxis]

        # Calculate the matrix M by inverting invM
        self.M = np.empty((Nr, Nr))
        if m !=0 and p != m-1:
            self.M[:, 1:] = np.linalg.pinv( self.invM[1:,:] )
            self.M[:, 0] = 0.
        else:
            self.M = np.linalg.inv( self.invM )

        # Copy the matrices to the GPU if needed
        if self.use_cuda:
            self.d_M = cupy.asarray( self.M )
            self.d_invM = cupy.asarray( self.invM )

        # Initialize buffer arrays to store the complex Nz x Nr grid
        # as a real 2Nz x Nr grid, before performing the matrix product
        # (This is because a matrix product of reals is faster than a matrix
        # product of complexs, and the real-complex conversion is negligible.)
        if not self.use_cuda:
            # Initialize real buffer arrays on the CPU
            zero_array = np.zeros((2*Nz, Nr), dtype=np.float64)
            self.array_in = zero_array.copy()
            self.array_out = zero_array.copy()
        else:
            # Initialize real buffer arrays on the GPU
            zero_array = np.zeros((2*Nz, Nr), dtype=np.float64)
            self.d_in = cupy.asarray( zero_array )
            self.d_out = cupy.asarray( zero_array )
            # Initialize cuBLAS
            self.blas = device.get_cublas_handle()
            # Set optimal number of CUDA threads per block
            # for copy 2d real/complex (determined empirically)
            copy_tpb = (8,32) if cuda_gpu_model == "V100" else (2,16)
            # Initialize the threads per block and block per grid
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr, *copy_tpb)

Пример #14

0

Показать файл

    def filter(self, fieldtype):
        """
        Filter the field `fieldtype`

        Parameter
        ---------
        fieldtype : string
            A string which represents the kind of field to be filtered
            (either 'E', 'B', 'J', 'rho_next' or 'rho_prev')
        """
        if self.use_cuda:
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d(self.Nz, self.Nr)
            # Filter fields on the GPU
            if fieldtype == 'rho_prev':
                cuda_filter_scalar[dim_grid, dim_block](self.rho_prev,
                                                        self.d_filter_array,
                                                        self.Nz, self.Nr)
            elif fieldtype == 'rho_next':
                cuda_filter_scalar[dim_grid, dim_block](self.rho_next,
                                                        self.d_filter_array,
                                                        self.Nz, self.Nr)
            elif fieldtype == 'J':
                cuda_filter_vector[dim_grid,
                                   dim_block](self.Jp, self.Jm, self.Jz,
                                              self.d_filter_array, self.Nz,
                                              self.Nr)
            elif fieldtype == 'E':
                cuda_filter_vector[dim_grid,
                                   dim_block](self.Ep, self.Em, self.Ez,
                                              self.d_filter_array, self.Nz,
                                              self.Nr)
            elif fieldtype == 'B':
                cuda_filter_vector[dim_grid,
                                   dim_block](self.Bp, self.Bm, self.Bz,
                                              self.d_filter_array, self.Nz,
                                              self.Nr)
            else:
                raise ValueError('Invalid string for fieldtype: %s' %
                                 fieldtype)
        else:
            # Filter fields on the CPU

            if fieldtype == 'rho_prev':
                self.rho_prev = self.rho_prev * self.filter_array
            elif fieldtype == 'rho_next':
                self.rho_next = self.rho_next * self.filter_array
            elif fieldtype == 'J':
                self.Jp = self.Jp * self.filter_array
                self.Jm = self.Jm * self.filter_array
                self.Jz = self.Jz * self.filter_array
            elif fieldtype == 'E':
                self.Ep = self.Ep * self.filter_array
                self.Em = self.Em * self.filter_array
                self.Ez = self.Ez * self.filter_array
            elif fieldtype == 'B':
                self.Bp = self.Bp * self.filter_array
                self.Bm = self.Bm * self.filter_array
                self.Bz = self.Bz * self.filter_array
            else:
                raise ValueError('Invalid string for fieldtype: %s' %
                                 fieldtype)

Пример #15

0

Показать файл

Файл: field_buffer_handling.py Проект: fhabib/fbpic

    def handle_scal_buffer(self,
                           grid,
                           method,
                           use_cuda,
                           before_sending=False,
                           after_receiving=False):
        """
        Scalar field buffer handling

        1) Copies data from the field grid to the MPI sending buffers
        -- or --
        2) Replaces or adds MPI sending buffers to the field grid

        For method 'replace':

        Either copy the inner part of the domain to the sending buffer
        for a scalar field, or replace the receving buffer for a scalar field
        to the guard cells of the domain.

        For method 'add':

        Either copy the inner part and the guard region of the domain to the
        sending buffer for a scalar field, or add the receving buffer for the
        scalar field to the guard cells and the inner region of the domain.

        Depending on whether the field data is initially on the CPU
        or on the GPU, this function will do the appropriate exchange
        with the device.

        Parameters
        ----------
        grid: list of 2darrays
            (One element per azimuthal mode)
            The 2d arrays represent the fields on the interpolation grid

        method: str
            Can either be 'replace' or 'add' depending on the type
            of field exchange that is needed

        use_cuda: bool
            Whether the simulation runs on GPUs. If True,
            the buffers are copied to the GPU arrays after the MPI exchange.

        before_sending: bool
            Whether to copy the inner part of the domain to the sending buffer

        after_receiving: bool
            Whether to copy the receiving buffer to the guard cells
        """
        if method == 'replace':
            nz_start = self.n_guard
            nz_end = 2 * self.n_guard
        if method == 'add':
            nz_start = 0
            nz_end = 2 * self.n_guard

        copy_left = (self.left_proc is not None)
        copy_right = (self.right_proc is not None)
        Nz = grid[0].shape[0]

        # When using the GPU
        if use_cuda:
            # Calculate the number of blocks and threads per block
            dim_grid_2d, dim_block_2d = cuda_tpb_bpg_2d(
                nz_end - nz_start, self.Nr)

            if before_sending:
                if method == 'replace':
                    # Copy the inner regions of the domain to the GPU buffers
                    for m in range(self.Nm):
                        copy_scal_to_gpu_buffer[dim_grid_2d, dim_block_2d](
                            self.d_scal_rep_buffer_l, self.d_scal_rep_buffer_r,
                            grid[m], m, copy_left, copy_right, nz_start,
                            nz_end)
                    # Copy the GPU buffers to the sending CPU buffers
                    if copy_left:
                        self.d_scal_rep_buffer_l.copy_to_host(
                            self.scal_rep_send_l)
                    if copy_right:
                        self.d_scal_rep_buffer_r.copy_to_host(
                            self.scal_rep_send_r)

                if method == 'add':
                    # Copy the inner+guard regions of the domain to the buffers
                    for m in range(self.Nm):
                        copy_scal_to_gpu_buffer[dim_grid_2d, dim_block_2d](
                            self.d_scal_add_buffer_l, self.d_scal_add_buffer_r,
                            grid[m], m, copy_left, copy_right, nz_start,
                            nz_end)
                    # Copy the GPU buffers to the sending CPU buffers
                    if copy_left:
                        self.d_scal_add_buffer_l.copy_to_host(
                            self.scal_add_send_l)
                    if copy_right:
                        self.d_scal_add_buffer_r.copy_to_host(
                            self.scal_add_send_r)

            elif after_receiving:
                if method == 'replace':
                    # Copy the CPU receiving buffers to the GPU buffers
                    if copy_left:
                        self.d_scal_rep_buffer_l.copy_to_device(
                            self.scal_rep_recv_l)
                    if copy_right:
                        self.d_scal_rep_buffer_r.copy_to_device(
                            self.scal_rep_recv_r)
                    # Replace the guard cells of the domain with the buffers
                    for m in range(self.Nm):
                        replace_scal_from_gpu_buffer[
                            dim_grid_2d,
                            dim_block_2d](self.d_scal_rep_buffer_l,
                                          self.d_scal_rep_buffer_r, grid[m], m,
                                          copy_left, copy_right, nz_start,
                                          nz_end)

                if method == 'add':
                    # Copy the CPU receiving buffers to the GPU buffers
                    if copy_left:
                        self.d_scal_add_buffer_l.copy_to_device(
                            self.scal_add_recv_l)
                    if copy_right:
                        self.d_scal_add_buffer_r.copy_to_device(
                            self.scal_add_recv_r)
                    # Add the GPU buffers to the domain
                    for m in range(self.Nm):
                        add_scal_from_gpu_buffer[dim_grid_2d, dim_block_2d](
                            self.d_scal_add_buffer_l, self.d_scal_add_buffer_r,
                            grid[m], m, copy_left, copy_right, nz_start,
                            nz_end)
        # Without GPU
        else:
            if before_sending:
                if method == 'replace':
                    # Copy the inner regions of the domain to the buffer
                    if copy_left:
                        for m in range(self.Nm):
                            self.scal_rep_send_l[m, :, :] = grid[m][
                                nz_start:nz_end, :]
                    if copy_right:
                        for m in range(self.Nm):
                            self.scal_rep_send_r[m, :, :] = grid[m][
                                Nz - nz_end:Nz - nz_start, :]

                if method == 'add':
                    # Copy the inner+guard regions of the domain to the buffer
                    if copy_left:
                        for m in range(self.Nm):
                            self.scal_add_send_l[m, :, :] = grid[m][
                                nz_start:nz_end, :]
                    if copy_right:
                        for m in range(self.Nm):
                            self.scal_add_send_r[m, :, :] = grid[m][
                                Nz - nz_end:Nz - nz_start, :]

            elif after_receiving:
                if method == 'replace':
                    # Replace the guard cells of the domain with the buffers
                    if copy_left:
                        for m in range(self.Nm):
                            grid[m][:nz_end -
                                    nz_start, :] = self.scal_rep_recv_l[
                                        m, :, :]
                    if copy_right:
                        for m in range(self.Nm):
                            grid[m][-(nz_end - nz_start
                                      ):, :] = self.scal_rep_recv_r[m, :, :]

                if method == 'add':
                    # Add buffers to the domain
                    if copy_left:
                        for m in range(self.Nm):
                            grid[m][:nz_end -
                                    nz_start, :] += self.scal_add_recv_l[
                                        m, :, :]
                    if copy_right:
                        for m in range(self.Nm):
                            grid[m][-(nz_end - nz_start
                                      ):, :] += self.scal_add_recv_r[m, :, :]

Пример #16

0

Показать файл

Файл: fourier.py Проект: xyuan/fbpic

    def __init__(self, Nr, Nz, use_cuda=False, nthreads=None):
        """
        Initialize an FFT object

        Parameters
        ----------
        Nr: int
           Number of grid points along the r axis (axis -1)

        Nz: int
           Number of grid points along the z axis (axis 0)

        use_cuda: bool, optional
           Whether to perform the Fourier transform on the z axis

        nthreads : int, optional
            Number of threads for the FFTW transform.
            If None, the default number of threads of numba is used
            (environment variable NUMBA_NUM_THREADS)
        """
        # Check whether to use cuda
        self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False):
            self.use_cuda = False
            print('** Cuda not available for Fourier transform.')
            print('** Performing the Fourier transform on the CPU.')

        # Check whether to use MKL
        self.use_mkl = mkl_installed

        # Initialize the object for calculation on the GPU
        if self.use_cuda:
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr)

            # Initialize 1d buffer for cufft
            self.buffer1d_in = cuda.device_array((Nz * Nr, ),
                                                 dtype=np.complex128)
            self.buffer1d_out = cuda.device_array((Nz * Nr, ),
                                                  dtype=np.complex128)
            # Initialize the cuda libraries object
            self.fft = cufft.FFTPlan(shape=(Nz, ),
                                     itype=np.complex128,
                                     otype=np.complex128,
                                     batch=Nr)
            self.blas = cublas.Blas()  # For normalization of the iFFT
            self.inv_Nz = 1. / Nz  # For normalization of the iFFT

        # Initialize the object for calculation on the CPU
        else:

            # For MKL FFT
            if self.use_mkl:
                # Initialize the MKL plan with dummy array
                spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                self.mklfft = MKLFFT(spect_buffer)

            # For FFTW
            else:
                # Determine number of threads
                if nthreads is None:
                    # Get the default number of threads for numba
                    nthreads = numba.config.NUMBA_NUM_THREADS
                # Initialize the FFT plan with dummy arrays
                interp_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                spect_buffer = np.zeros((Nz, Nr), dtype=np.complex128)
                self.fft = pyfftw.FFTW(interp_buffer,
                                       spect_buffer,
                                       axes=(0, ),
                                       direction='FFTW_FORWARD',
                                       threads=nthreads)
                self.ifft = pyfftw.FFTW(spect_buffer,
                                        interp_buffer,
                                        axes=(0, ),
                                        direction='FFTW_BACKWARD',
                                        threads=nthreads)

Пример #17

0

Показать файл

    def correct_currents(self, dt, ps, current_correction):
        """
        Correct the currents so that they satisfy the
        charge conservation equation

        Parameters
        ----------
        dt: float
            Timestep of the simulation

        ps: a PSATDCoefs object
            Contains coefficients that are used in the current correction

        current_correction: string
            The type of current correction performed
        """
        # Precalculate useful coefficient
        inv_dt = 1. / dt

        if self.use_cuda:
            # Obtain the cuda grid
            dim_grid, dim_block = cuda_tpb_bpg_2d(self.Nz, self.Nr, 1, 16)
            # Correct the currents on the GPU
            if ps.V is None:
                # With standard PSATD algorithm
                # Method: curl-free
                if current_correction == 'curl-free':
                    cuda_correct_currents_curlfree_standard \
                        [dim_grid, dim_block](
                            self.rho_prev, self.rho_next,
                            self.Jp, self.Jm, self.Jz,
                            self.d_kz, self.d_kr, self.d_inv_k2,
                            inv_dt, self.Nz, self.Nr )
                # Method: cross-deposition
                elif current_correction == 'cross-deposition':
                    cuda_correct_currents_crossdeposition_standard \
                        [dim_grid, dim_block](
                            self.rho_prev, self.rho_next,
                            self.rho_next_z, self.rho_next_xy,
                            self.Jp, self.Jm, self.Jz,
                            self.d_kz, self.d_kr, inv_dt, self.Nz, self.Nr)
            else:
                # With Galilean/comoving algorithm
                # Method: curl-free
                if current_correction == 'curl-free':
                    cuda_correct_currents_curlfree_comoving \
                        [dim_grid, dim_block](
                            self.rho_prev, self.rho_next,
                            self.Jp, self.Jm, self.Jz,
                            self.d_kz, self.d_kr, self.d_inv_k2,
                            ps.d_j_corr_coef, ps.d_T_eb, ps.d_T_cc,
                            inv_dt, self.Nz, self.Nr)
                # Method: cross-deposition
                elif current_correction == 'cross-deposition':
                    cuda_correct_currents_crossdeposition_comoving \
                        [dim_grid, dim_block](
                            self.rho_prev, self.rho_next,
                            self.rho_next_z, self.rho_next_xy,
                            self.Jp, self.Jm, self.Jz,
                            self.d_kz, self.d_kr,
                            ps.d_j_corr_coef, ps.d_T_eb, ps.d_T_cc,
                            inv_dt, self.Nz, self.Nr)
        else:
            # Correct the currents on the CPU
            if ps.V is None:
                # With standard PSATD algorithm
                # Method: curl-free
                if current_correction == 'curl-free':
                    numba_correct_currents_curlfree_standard(
                        self.rho_prev, self.rho_next, self.Jp, self.Jm,
                        self.Jz, self.kz, self.kr, self.inv_k2, inv_dt,
                        self.Nz, self.Nr)
                # Method: cross-deposition
                elif current_correction == 'cross-deposition':
                    numba_correct_currents_crossdeposition_standard(
                        self.rho_prev, self.rho_next, self.rho_next_z,
                        self.rho_next_xy, self.Jp, self.Jm, self.Jz, self.kz,
                        self.kr, inv_dt, self.Nz, self.Nr)
            else:
                # With Galilean/comoving algorithm
                # Method: curl-free
                if current_correction == 'curl-free':
                    numba_correct_currents_curlfree_comoving(
                        self.rho_prev, self.rho_next, self.Jp, self.Jm,
                        self.Jz, self.kz, self.kr, self.inv_k2, ps.j_corr_coef,
                        ps.T_eb, ps.T_cc, inv_dt, self.Nz, self.Nr)
                # Method: cross-deposition
                elif current_correction == 'cross-deposition':
                    numba_correct_currents_crossdeposition_comoving(
                        self.rho_prev, self.rho_next, self.rho_next_z,
                        self.rho_next_xy, self.Jp, self.Jm, self.Jz, self.kz,
                        self.kr, ps.j_corr_coef, ps.T_eb, ps.T_cc, inv_dt,
                        self.Nz, self.Nr)

Python cuda_tpb_bpg_2d примеры использования