示例#1
0
    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, A=0.0, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        A: Eddy viscosity coefficient (O(dx))
        t: Start simulation at time t
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        """

        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        halo_x = 1
        halo_y = 1
        ghost_cells_x = 1
        ghost_cells_y = 1
        y_zero_reference_cell = y_zero_reference_cell + 1

        self.boundary_conditions = boundary_conditions
        if boundary_conditions.isSponge():
            nx = nx + boundary_conditions.spongeCells[
                1] + boundary_conditions.spongeCells[3] - 2 * ghost_cells_x
            ny = ny + boundary_conditions.spongeCells[
                0] + boundary_conditions.spongeCells[2] - 2 * ghost_cells_y
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[
                2]

        # self.<parameters> are sat in parent constructor:
        rk_order = None
        theta = None
        super(CTCS, self).__init__(gpu_ctx, \
                                   nx, ny, \
                                   ghost_cells_x, \
                                   ghost_cells_y, \
                                   dx, dy, dt, \
                                   g, f, r, A, \
                                   t, \
                                   theta, rk_order, \
                                   coriolis_beta, \
                                   y_zero_reference_cell, \
                                   wind_stress, \
                                   write_netcdf, \
                                   ignore_ghostcells, \
                                   offset_x, offset_y, \
                                   block_width, block_height)

        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-1, -1, 1, 1])
        self._set_interior_domain_from_sponge_cells()

        #Get kernels
        self.u_kernel = gpu_ctx.get_kernel("CTCS_U_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.v_kernel = gpu_ctx.get_kernel("CTCS_V_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.eta_kernel = gpu_ctx.get_kernel("CTCS_eta_kernel.cu",
                                             defines={
                                                 'block_width': block_width,
                                                 'block_height': block_height
                                             })

        # Get CUDA functions
        self.computeUKernel = self.u_kernel.get_function("computeUKernel")
        self.computeVKernel = self.v_kernel.get_function("computeVKernel")
        self.computeEtaKernel = self.eta_kernel.get_function(
            "computeEtaKernel")

        # Prepare kernel lauches
        self.computeUKernel.prepare("iiiifffffffffPiPiPiPiPif")
        self.computeVKernel.prepare("iiiifffffffffPiPiPiPiPif")
        self.computeEtaKernel.prepare("iiffffffffPiPiPi")

        # Set up textures
        self.update_wind_stress(self.u_kernel, self.computeUKernel)
        self.update_wind_stress(self.v_kernel, self.computeVKernel)

        #Create data by uploading to device
        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, halo_x, halo_y, H)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, halo_x,
                                               halo_y, eta0, hu0, hv0)

        # Global size needs to be larger than the default from parent.__init__
        self.global_size = ( \
                       int(np.ceil((self.nx+2*halo_x) / float(self.local_size[0]))), \
                       int(np.ceil((self.ny+2*halo_y) / float(self.local_size[1]))) \
                      )

        self.bc_kernel = CTCS_boundary_condition(gpu_ctx, \
                                                 self.nx, \
                                                 self.ny, \
                                                 self.boundary_conditions, \
                                                 halo_x, halo_y \
        )

        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)
示例#2
0
文件: FBL.py 项目: kaihc/gpu-ocean
    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        coriolis_beta: Coriolis linear factor -> f = f + beta*y
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        """

        #Create data by uploading to device
        ghost_cells_x = 0
        ghost_cells_y = 0
        y_zero_reference_cell = y_zero_reference_cell
        self.asym_ghost_cells = [0, 0, 0, 0]  # [N, E, S, W]

        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([None, None, 0, 0])

        self.boundary_conditions = boundary_conditions
        # Add asym ghost cell if periodic boundary condition:
        if (self.boundary_conditions.north == 2) or \
           (self.boundary_conditions.south == 2):
            self.asym_ghost_cells[0] = 1
            self.interior_domain_indices[0] = -1
        if (self.boundary_conditions.east == 2) or \
           (self.boundary_conditions.west == 2):
            self.asym_ghost_cells[1] = 1
            self.interior_domain_indices[1] = -1

        if boundary_conditions.isSponge():
            nx = nx + boundary_conditions.spongeCells[
                1] + boundary_conditions.spongeCells[
                    3]  # - self.asym_ghost_cells[1] - self.asym_ghost_cells[3]
            ny = ny + boundary_conditions.spongeCells[
                0] + boundary_conditions.spongeCells[
                    2]  # - self.asym_ghost_cells[0] - self.asym_ghost_cells[2]
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[
                2]

        rk_order = None
        theta = None
        A = None
        super(FBL, self).__init__(gpu_ctx, \
                                  nx, ny, \
                                  ghost_cells_x, \
                                  ghost_cells_y, \
                                  dx, dy, dt, \
                                  g, f, r, A, \
                                  t, \
                                  theta, rk_order, \
                                  coriolis_beta, \
                                  y_zero_reference_cell, \
                                  wind_stress, \
                                  write_netcdf, \
                                  ignore_ghostcells, \
                                  offset_x, offset_y, \
                                  block_width, block_height)
        self._set_interior_domain_from_sponge_cells()

        #Get kernels
        self.u_kernel = gpu_ctx.get_kernel("FBL_U_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.v_kernel = gpu_ctx.get_kernel("FBL_V_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.eta_kernel = gpu_ctx.get_kernel("FBL_eta_kernel.cu",
                                             defines={
                                                 'block_width': block_width,
                                                 'block_height': block_height
                                             })

        # Get CUDA functions
        self.computeUKernel = self.u_kernel.get_function("computeUKernel")
        self.computeVKernel = self.v_kernel.get_function("computeVKernel")
        self.computeEtaKernel = self.eta_kernel.get_function(
            "computeEtaKernel")

        # Prepare kernel lauches
        self.computeUKernel.prepare("iiffffffffPiPiPiPif")
        self.computeVKernel.prepare("iiffffffffPiPiPiPif")
        self.computeEtaKernel.prepare("iiffffffffPiPiPiPi")

        # Set up textures
        self.update_wind_stress(self.u_kernel, self.computeUKernel)
        self.update_wind_stress(self.v_kernel, self.computeVKernel)

        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x,
                                    ghost_cells_y, H, self.asym_ghost_cells)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               eta0, hu0, hv0,
                                               self.asym_ghost_cells)

        # Overwrite halo with asymetric ghost cells
        self.nx_halo = np.int32(nx + self.asym_ghost_cells[1] +
                                self.asym_ghost_cells[3])
        self.ny_halo = np.int32(ny + self.asym_ghost_cells[0] +
                                self.asym_ghost_cells[2])

        self.bc_kernel = FBL_periodic_boundary(self.gpu_ctx, \
                                               self.nx, \
                                               self.ny, \
                                               self.boundary_conditions, \
                                               self.asym_ghost_cells
        )

        self.totalNumIterations = 0
        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)
示例#3
0
    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 1, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 comm=None, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+3) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        coriolis_beta: Coriolis linear factor -> f = f + beta*y
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        comm: MPI communicator
        """
        
        #### THIS ALLOWS MAKES IT POSSIBLE TO GIVE THE OLD INPUT SHAPES TO NEW GHOST CELL REGIME: Only valid for benchmarking!
        if (eta0.shape == (ny, nx)):
            new_eta = np.zeros((ny+2, nx+2),  dtype=np.float32)
            new_eta[:ny, :nx] = eta0.copy()
            eta0 = new_eta.copy()
        if (H.shape == (ny, nx)):
            new_H = np.ones((ny+2, nx+2),  dtype=np.float32)*np.max(H)
            new_H[:ny,:nx] = H.copy()
            H = new_H.copy()
        if (hu0.shape == (ny, nx+1)):
            new_hu = np.zeros((ny+2, nx+1),  dtype=np.float32)
            new_hu[:ny, :nx+1] = hu0.copy()
            hu0 = new_hu.copy()
        if (hv0.shape == (ny+1, nx)):
            new_hv = np.zeros((ny+3, nx+2),  dtype=np.float32)
            new_hv[:ny+1,:nx] = hv0.copy()
            hv0 = new_hv.copy()
        
            
         
        
        
        #Create data by uploading to device
        ghost_cells_x = 1
        ghost_cells_y = 1
        y_zero_reference_cell = y_zero_reference_cell
        
        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-1, -1, 1, 1])
        
        self.boundary_conditions = boundary_conditions

        if boundary_conditions.isSponge():
            nx = nx - 2 + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3]
            ny = ny - 2 + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2]
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[2]
          
        rk_order = None
        theta = None
        A = None
        super(FBL, self).__init__(gpu_ctx, \
                                  nx, ny, \
                                  ghost_cells_x, \
                                  ghost_cells_y, \
                                  dx, dy, dt, \
                                  g, f, r, A, \
                                  t, \
                                  theta, rk_order, \
                                  coriolis_beta, \
                                  y_zero_reference_cell, \
                                  wind_stress, \
                                  write_netcdf, \
                                  ignore_ghostcells, \
                                  offset_x, offset_y, \
                                  comm, \
                                  block_width, block_height)
        self._set_interior_domain_from_sponge_cells()
        
        
        #Get kernels
        self.step_kernel = gpu_ctx.get_kernel("FBL_step_kernel.cu", 
                defines={'block_width': block_width, 'block_height': block_height},
                compile_args={
                    'no_extern_c': True,
                    'options': ["--use_fast_math"],
                    #'options': ["--generate-line-info"], 
                    #'options': ["--maxrregcount=32"]
                    #'arch': "compute_50", 
                    #'code': "sm_50"
                },
                jit_compile_args={
                    #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)]
                }
        )
         
        # Get CUDA functions 
        self.fblStepKernel = self.step_kernel.get_function("fblStepKernel")
        
        # Prepare kernel lauches
        self.fblStepKernel.prepare("iiffffffffPiPiPiPiif")
        
        # Set up textures
        self.update_wind_stress(self.step_kernel, self.fblStepKernel)
        
        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0, fbl=True)
        
        # Domain including ghost cells
        self.nx_halo = np.int32(nx + 2)
        self.ny_halo = np.int32(ny + 2)
       
        self.bc_kernel = FBL_boundary_conditions(self.gpu_ctx, \
                                                 self.nx, \
                                                 self.ny, \
                                                 self.boundary_conditions
        )
        
        # Bit-wise boolean for wall boundary conditions
        self.wall_bc = np.int32(0)
        if (self.boundary_conditions.north == 1):
            self.wall_bc = self.wall_bc | 0x01
        if (self.boundary_conditions.east == 1):
            self.wall_bc = self.wall_bc | 0x02
        if (self.boundary_conditions.south == 1):
            self.wall_bc = self.wall_bc | 0x04
        if (self.boundary_conditions.west == 1):
            self.wall_bc = self.wall_bc | 0x08

        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, \
                                    offset_x=self.offset_x, offset_y=self.offset_y)