Пример #1
0
 def allocateBuffers(self, HCPU):
     host_buffer = np.zeros((self.ny, self.nx))
     self.eta = Common.CUDAArray2D(self.gpu_stream, self.nx, self.ny, 0, 0,
                                   host_buffer)
     self.hu = Common.CUDAArray2D(self.gpu_stream, self.nx, self.ny, 0, 0,
                                  host_buffer)
     self.hv = Common.CUDAArray2D(self.gpu_stream, self.nx, self.ny, 0, 0,
                                  host_buffer)
     self.H = Common.CUDAArray2D(self.gpu_stream, self.nx + 1, self.ny + 1,
                                 0, 0, HCPU)
     del host_buffer
Пример #2
0
    def _setupGPU(self):
        """
        Setting up kernel for reading observations, along with host and device buffers
        """
        # Create observation buffer!
        if self.observation_type == dautils.ObservationType.UnderlyingFlow or \
            self.observation_type == dautils.ObservationType.DirectUnderlyingFlow:

            zeros = np.zeros((self.driftersPerOceanModel, 2),
                             dtype=np.float32,
                             order='C')
            self.observation_buffer = Common.CUDAArray2D(self.gpu_stream, \
                                                         2, self.driftersPerOceanModel, 0, 0, \
                                                         zeros)

            # Generate kernels
            self.observation_kernels = self.gpu_ctx.get_kernel("observationKernels.cu", \
                                                             defines={})

            # Get CUDA functions and define data types for prepared_{async_}call()
            self.observeUnderlyingFlowKernel = self.observation_kernels.get_function(
                "observeUnderlyingFlow")
            self.observeUnderlyingFlowKernel.prepare("iiffiiPiPiPifiPiPi")

            self.local_size = (int(self.driftersPerOceanModel), 1, 1)
            self.global_size = (1, 1)
Пример #3
0
    def test_copy_buffer(self):
        clarray2 = Common.CUDAArray2D(self.gpu_stream, \
                                      self.nx, self.ny, self.nx_halo, self.ny_halo, \
                                      self.buf3)

        host_data_pre_copy = self.cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_pre_copy.tolist(), self.buf1.tolist())

        self.cudaarray.copyBuffer(self.gpu_stream, clarray2)
        host_data_post_copy = self.cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_post_copy.tolist(), self.buf3.tolist())

        self.tests_failed = False
Пример #4
0
    def __init__(self, gpu_ctx, numDrifters, \
                 observation_variance=0.01, \
                 boundaryConditions=Common.BoundaryConditions(), \
                 initialization_cov_drifters=None, \
                 domain_size_x=1.0, domain_size_y=1.0, \
                 gpu_stream=None, \
                 block_width = 64):
        
        super(GPUDrifterCollection, self).__init__(numDrifters,
                                observation_variance=observation_variance,
                                boundaryConditions=boundaryConditions,
                                domain_size_x=domain_size_x, 
                                domain_size_y=domain_size_y)
        
        # Define CUDA environment:
        self.gpu_ctx = gpu_ctx
        self.block_width = block_width
        self.block_height = 1
        
        # TODO: Where should the cl_queue come from?
        # For sure, the drifter and the ocean simulator should use 
        # the same queue...
        self.gpu_stream = gpu_stream
        if self.gpu_stream is None:
            self.gpu_stream = cuda.Stream()
                
        self.sensitivity = 1.0
         
        self.driftersHost = np.zeros((self.getNumDrifters() + 1, 2)).astype(np.float32, order='C')
        self.driftersDevice = Common.CUDAArray2D(self.gpu_stream, \
                                                 2, self.getNumDrifters()+1, 0, 0, \
                                                 self.driftersHost)
        
        self.drift_kernels = gpu_ctx.get_kernel("driftKernels.cu", \
                                                defines={'block_width': self.block_width, 'block_height': self.block_height})

        # Get CUDA functions and define data types for prepared_{async_}call()
        self.passiveDrifterKernel = self.drift_kernels.get_function("passiveDrifterKernel")
        self.passiveDrifterKernel.prepare("iifffiiPiPiPifiiiPif")
        self.enforceBoundaryConditionsKernel = self.drift_kernels.get_function("enforceBoundaryConditions")
        self.enforceBoundaryConditionsKernel.prepare("ffiiiPi")
        
        self.local_size = (self.block_width, self.block_height, 1)
        self.global_size = (\
                            int(np.ceil((self.getNumDrifters() + 2)/float(self.block_width))), \
                            1)
        
        # Initialize drifters:
        self.uniformly_distribute_drifters(initialization_cov_drifters=initialization_cov_drifters)
Пример #5
0
    def test_cross_precision_copy_buffer(self):
        self.init_double()

        single_cudaarray2 = Common.CUDAArray2D(self.gpu_stream, \
                                               self.nx, self.ny, \
                                               self.nx_halo, self.ny_halo, \
                                               self.buf3)

        host_data_pre_copy = self.double_cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_pre_copy.tolist(), self.dbuf1.tolist())

        with self.assertRaises(AssertionError):
            self.double_cudaarray.copyBuffer(self.gpu_stream,
                                             single_cudaarray2)

        self.tests_failed = False
Пример #6
0
    def test_double_copy_buffer(self):
        self.init_double()

        double_cudaarray2 = Common.CUDAArray2D(self.gpu_stream, \
                                               self.nx, self.ny, \
                                               self.nx_halo, self.ny_halo, \
                                               self.dbuf3, \
                                               double_precision=True)

        host_data_pre_copy = self.double_cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_pre_copy.tolist(), self.dbuf1.tolist())

        self.double_cudaarray.copyBuffer(self.gpu_stream, double_cudaarray2)
        host_data_post_copy = self.double_cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_post_copy.tolist(), self.dbuf3.tolist())

        self.tests_failed = False
Пример #7
0
    def setUp(self):

        #Set which CL device to use, and disable kernel caching
        self.gpu_ctx = Common.CUDAContext()

        # Make some host data which we can play with
        self.nx = 3
        self.ny = 5
        self.nx_halo = 1
        self.ny_halo = 2
        self.dataShape = (self.ny + 2 * self.ny_halo,
                          self.nx + 2 * self.nx_halo)

        self.buf1 = np.zeros(self.dataShape, dtype=np.float32, order='C')
        self.dbuf1 = np.zeros(self.dataShape)
        self.buf3 = np.zeros(self.dataShape, dtype=np.float32, order='C')
        self.dbuf3 = np.zeros(self.dataShape)
        for j in range(self.dataShape[0]):
            for i in range(self.dataShape[1]):
                self.buf1[j, i] = i * 100 + j
                self.dbuf1[j, i] = self.buf1[j, i]
                self.buf3[j, i] = j * 1000 - i
                self.dbuf3[j, i] = self.buf3[j, i]

        self.explicit_free = False

        self.device_name = self.gpu_ctx.cuda_device.name()
        self.gpu_stream = cuda.Stream()

        self.tests_failed = True

        self.cudaarray = Common.CUDAArray2D(self.gpu_stream, \
                                            self.nx, self.ny, \
                                            self.nx_halo, self.ny_halo, \
                                            self.buf1)

        self.double_cudaarray = None
Пример #8
0
    def __init__(self,
                 gpu_ctx,
                 gpu_stream,
                 nx,
                 ny,
                 dx,
                 dy,
                 boundaryConditions,
                 staggered,
                 soar_q0=None,
                 soar_L=None,
                 interpolation_factor=1,
                 use_lcg=False,
                 angle=np.array([[0]], dtype=np.float32),
                 coriolis_f=np.array([[0]], dtype=np.float32),
                 block_width=16,
                 block_height=16):
        """
        Initiates a class that generates small scale geostrophically balanced perturbations of
        the ocean state.
        (nx, ny): number of internal grid cells in the domain
        (dx, dy): size of each grid cell
        soar_q0: amplitude parameter for the perturbation, default: dx*1e-5
        soar_L: length scale of the perturbation covariance, default: 0.74*dx*interpolation_factor
        interpolation_factor: indicates that the perturbation of eta should be generated on a coarse mesh, 
            and then interpolated down to the computational mesh. The coarse mesh will then have
            (nx/interpolation_factor, ny/interpolation_factor) grid cells.
        use_lcg: LCG is a linear algorithm for generating a serie of pseudo-random numbers
        angle: Angle of rotation from North to y-axis as a texture (cuda.Array) or numpy array
        (block_width, block_height): The size of each GPU block
        """

        self.use_lcg = use_lcg

        # Set numpy random state
        self.random_state = np.random.RandomState()

        # Make sure that all variables initialized within ifs are defined
        self.random_numbers = None
        self.rng = None
        self.seed = None
        self.host_seed = None

        self.gpu_ctx = gpu_ctx
        self.gpu_stream = gpu_stream

        self.nx = np.int32(nx)
        self.ny = np.int32(ny)
        self.dx = np.float32(dx)
        self.dy = np.float32(dy)
        self.staggered = np.int(0)
        if staggered:
            self.staggered = np.int(1)

        # The cutoff parameter is hard-coded.
        # The size of the cutoff determines the computational radius in the
        # SOAR function. Hence, the size of the local memory in the OpenCL
        # kernels has to be hard-coded.
        self.cutoff = np.int32(config.soar_cutoff)

        # Check that the interpolation factor plays well with the grid size:
        assert (interpolation_factor > 0 and interpolation_factor % 2
                == 1), 'interpolation_factor must be a positive odd integer'

        assert (nx % interpolation_factor == 0
                ), 'nx must be divisible by the interpolation factor'
        assert (ny % interpolation_factor == 0
                ), 'ny must be divisible by the interpolation factor'
        self.interpolation_factor = np.int32(interpolation_factor)

        # The size of the coarse grid
        self.coarse_nx = np.int32(nx / self.interpolation_factor)
        self.coarse_ny = np.int32(ny / self.interpolation_factor)
        self.coarse_dx = np.float32(dx * self.interpolation_factor)
        self.coarse_dy = np.float32(dy * self.interpolation_factor)

        self.periodicNorthSouth = np.int32(
            boundaryConditions.isPeriodicNorthSouth())
        self.periodicEastWest = np.int32(
            boundaryConditions.isPeriodicEastWest())

        # Size of random field and seed
        # The SOAR function is a stencil which requires cutoff number of grid cells,
        # and the interpolation operator requires further 2 ghost cell values in each direction.
        # The random field must therefore be created with 2 + cutoff number of ghost cells.
        self.rand_ghost_cells_x = np.int32(2 + self.cutoff)
        self.rand_ghost_cells_y = np.int32(2 + self.cutoff)
        if self.periodicEastWest:
            self.rand_ghost_cells_x = np.int32(0)
        if self.periodicNorthSouth:
            self.rand_ghost_cells_y = np.int32(0)
        self.rand_nx = np.int32(self.coarse_nx + 2 * self.rand_ghost_cells_x)
        self.rand_ny = np.int32(self.coarse_ny + 2 * self.rand_ghost_cells_y)

        # Since normal distributed numbers are generated in pairs, we need to store half the number of
        # of seed values compared to the number of random numbers.
        self.seed_ny = np.int32(self.rand_ny)
        self.seed_nx = np.int32(np.ceil(self.rand_nx / 2))

        # Generate seed:
        self.floatMax = 2147483648.0
        if self.use_lcg:
            self.host_seed = self.random_state.rand(
                self.seed_ny, self.seed_nx) * self.floatMax
            self.host_seed = self.host_seed.astype(np.uint64, order='C')

        if not self.use_lcg:
            self.rng = XORWOWRandomNumberGenerator()
        else:
            self.seed = Common.CUDAArray2D(gpu_stream,
                                           self.seed_nx,
                                           self.seed_ny,
                                           0,
                                           0,
                                           self.host_seed,
                                           double_precision=True,
                                           integers=True)

        # Constants for the SOAR function:
        self.soar_q0 = np.float32(self.dx / 100000)
        if soar_q0 is not None:
            self.soar_q0 = np.float32(soar_q0)

        self.soar_L = np.float32(0.75 * self.coarse_dx)
        if soar_L is not None:
            self.soar_L = np.float32(soar_L)

        # Allocate memory for random numbers (xi)
        self.random_numbers_host = np.zeros((self.rand_ny, self.rand_nx),
                                            dtype=np.float32,
                                            order='C')
        self.random_numbers = Common.CUDAArray2D(self.gpu_stream, self.rand_nx,
                                                 self.rand_ny, 0, 0,
                                                 self.random_numbers_host)

        # Allocate a second buffer for random numbers (nu)
        self.perpendicular_random_numbers_host = np.zeros(
            (self.rand_ny, self.rand_nx), dtype=np.float32, order='C')
        self.perpendicular_random_numbers = Common.CUDAArray2D(
            self.gpu_stream, self.rand_nx, self.rand_ny, 0, 0,
            self.random_numbers_host)

        # Allocate memory for coarse buffer if needed
        # Two ghost cells in each direction needed for bicubic interpolation
        self.coarse_buffer_host = np.zeros(
            (self.coarse_ny + 4, self.coarse_nx + 4),
            dtype=np.float32,
            order='C')
        self.coarse_buffer = Common.CUDAArray2D(self.gpu_stream,
                                                self.coarse_nx, self.coarse_ny,
                                                2, 2, self.coarse_buffer_host)

        # Allocate extra memory needed for reduction kernels.
        # Currently: A single GPU buffer with 3x1 elements: [xi^T * xi, nu^T * nu, xi^T * nu]
        self.reduction_buffer = None
        reduction_buffer_host = np.zeros((1, 3), dtype=np.float32)
        self.reduction_buffer = Common.CUDAArray2D(self.gpu_stream, 3, 1, 0, 0,
                                                   reduction_buffer_host)

        # Generate kernels
        self.kernels = gpu_ctx.get_kernel("ocean_noise.cu", \
                                          defines={'block_width': block_width, 'block_height': block_height},
                                          compile_args={
                                              'options': ["--use_fast_math",
                                                          "--maxrregcount=32"]
                                          })

        self.reduction_kernels = self.gpu_ctx.get_kernel("reductions.cu", \
                                                         defines={})

        # Get CUDA functions and define data types for prepared_{async_}call()
        # Generate kernels
        self.squareSumKernel = self.reduction_kernels.get_function("squareSum")
        self.squareSumKernel.prepare("iiPP")

        self.squareSumDoubleKernel = self.reduction_kernels.get_function(
            "squareSumDouble")
        self.squareSumDoubleKernel.prepare("iiPPP")

        self.makePerpendicularKernel = self.kernels.get_function(
            "makePerpendicular")
        self.makePerpendicularKernel.prepare("iiPiPiP")

        self.uniformDistributionKernel = self.kernels.get_function(
            "uniformDistribution")
        self.uniformDistributionKernel.prepare("iiiPiPi")

        self.normalDistributionKernel = None
        if self.use_lcg:
            self.normalDistributionKernel = self.kernels.get_function(
                "normalDistribution")
            self.normalDistributionKernel.prepare("iiiPiPi")

        self.soarKernel = self.kernels.get_function("SOAR")
        self.soarKernel.prepare("iifffffiiPiPii")

        self.geostrophicBalanceKernel = self.kernels.get_function(
            "geostrophicBalance")
        self.geostrophicBalanceKernel.prepare("iiffiiffffPiPiPiPiPif")

        self.bicubicInterpolationKernel = self.kernels.get_function(
            "bicubicInterpolation")
        self.bicubicInterpolationKernel.prepare(
            "iiiiffiiiiffiiffffPiPiPiPiPif")

        #Compute kernel launch parameters
        self.local_size = (block_width, block_height, 1)

        self.local_size_reductions = (128, 1, 1)
        self.global_size_reductions = (1, 1)

        # Launch one thread for each seed, which in turns generates two iid N(0,1)
        self.global_size_random_numbers = ( \
                       int(np.ceil(self.seed_nx / float(self.local_size[0]))), \
                       int(np.ceil(self.seed_ny / float(self.local_size[1]))) \
                     )

        # Launch on thread for each random number (in order to create perpendicular random numbers)
        self.global_size_perpendicular = ( \
                      int(np.ceil(self.rand_nx / float(self.local_size[0]))), \
                      int(np.ceil(self.rand_ny / float(self.local_size[1]))) \
                     )

        # Launch one thread per SOAR-correlated result - need to write to two ghost
        # cells in order to do bicubic interpolation based on the result
        self.global_size_SOAR = ( \
                     int(np.ceil( (self.coarse_nx+4)/float(self.local_size[0]))), \
                     int(np.ceil( (self.coarse_ny+4)/float(self.local_size[1]))) \
                    )

        # One thread per resulting perturbed grid cell
        self.global_size_geo_balance = ( \
                    int(np.ceil( (self.nx)/float(self.local_size[0]))), \
                    int(np.ceil( (self.ny)/float(self.local_size[1]))) \
                   )

        # Texture for coriolis field
        self.coriolis_texref = self.kernels.get_texref("coriolis_f_tex")
        if isinstance(coriolis_f, cuda.Array):
            # coriolis_f is already a texture, so we just set the reference
            self.coriolis_texref.set_array(coriolis_f)
        else:
            #Upload data to GPU and bind to texture reference
            self.coriolis_texref.set_array(
                cuda.np_to_array(np.ascontiguousarray(coriolis_f,
                                                      dtype=np.float32),
                                 order="C"))

        # Set texture parameters
        self.coriolis_texref.set_filter_mode(
            cuda.filter_mode.LINEAR)  #bilinear interpolation
        self.coriolis_texref.set_address_mode(
            0, cuda.address_mode.CLAMP)  #no indexing outside domain
        self.coriolis_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.coriolis_texref.set_flags(
            cuda.TRSF_NORMALIZED_COORDINATES)  #Use [0, 1] indexing
        # FIXME! Allow different versions of coriolis, similar to CDKLM

        # Texture for angle towards north
        self.angle_texref = self.kernels.get_texref("angle_tex")
        if isinstance(angle, cuda.Array):
            # angle is already a texture, so we just set the reference
            self.angle_texref.set_array(angle)
        else:
            #Upload data to GPU and bind to texture reference
            self.angle_texref.set_array(
                cuda.np_to_array(np.ascontiguousarray(angle, dtype=np.float32),
                                 order="C"))

        # Set texture parameters
        self.angle_texref.set_filter_mode(
            cuda.filter_mode.LINEAR)  #bilinear interpolation
        self.angle_texref.set_address_mode(
            0, cuda.address_mode.CLAMP)  #no indexing outside domain
        self.angle_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.angle_texref.set_flags(
            cuda.TRSF_NORMALIZED_COORDINATES)  #Use [0, 1] indexing
Пример #9
0
    def __init__(self, \
                 gpu_ctx, \
                 eta0, hu0, hv0, H, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 angle=np.array([[0]], dtype=np.float32), \
                 t=0.0, \
                 theta=1.3, rk_order=2, \
                 coriolis_beta=0.0, \
                 max_wind_direction_perturbation = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 boundary_conditions_data=Common.BoundaryConditionsData(), \
                 small_scale_perturbation=False, \
                 small_scale_perturbation_amplitude=None, \
                 small_scale_perturbation_interpolation_factor = 1, \
                 model_time_step=None,
                 reportGeostrophicEquilibrium=False, \
                 use_lcg=False, \
                 write_netcdf=False, \
                 comm=None, \
                 netcdf_filename=None, \
                 ignore_ghostcells=False, \
                 courant_number=0.8, \
                 offset_x=0, offset_y=0, \
                 flux_slope_eps = 1.0e-1, \
                 desingularization_eps = 1.0e-1, \
                 depth_cutoff = 1.0e-5, \
                 block_width=32, block_height=8, num_threads_dt=256,
                 block_width_model_error=16, block_height_model_error=16):
        """
        Initialization routine
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        H: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        angle: Angle of rotation from North to y-axis
        t: Start simulation at time t
        theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme
        rk_order: Order of Runge Kutta method {1,2*,3}
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees)
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        small_scale_perturbation: Boolean value for applying a stochastic model error
        small_scale_perturbation_amplitude: Amplitude (q0 coefficient) for model error
        small_scale_perturbation_interpolation_factor: Width factor for correlation in model error
        model_time_step: The size of a data assimilation model step (default same as dt)
        reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep
        use_lcg: Use LCG as the random number generator. Default is False, which means using curand.
        write_netcdf: Write the results after each superstep to a netCDF file
        comm: MPI communicator
        desingularization_eps: Used for desingularizing hu/h
        flux_slope_eps: Used for setting zero flux for symmetric Riemann fan
        depth_cutoff: Used for defining dry cells
        netcdf_filename: Use this filename. (If not defined, a filename will be generated by SimWriter.)
        """
               
        self.logger = logging.getLogger(__name__)

        assert( rk_order < 4 or rk_order > 0 ), "Only 1st, 2nd and 3rd order Runge Kutta supported"

        if (rk_order == 3):
            assert(r == 0.0), "3rd order Runge Kutta supported only without friction"
        
        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        ghost_cells_x = 2
        ghost_cells_y = 2
        
        #Coriolis at "first" cell
        x_zero_reference_cell = ghost_cells_x
        y_zero_reference_cell = ghost_cells_y # In order to pass it to the super constructor
        
        # Boundary conditions
        self.boundary_conditions = boundary_conditions
        if (boundary_conditions.isSponge()):
            nx = nx + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] - 2*ghost_cells_x
            ny = ny + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] - 2*ghost_cells_y
            
            x_zero_reference_cell += boundary_conditions.spongeCells[3]
            y_zero_reference_cell += boundary_conditions.spongeCells[2]

        #Compensate f for reference cell (first cell in internal of domain)
        north = np.array([np.sin(angle[0,0]), np.cos(angle[0,0])])
        f = f - coriolis_beta * (x_zero_reference_cell*dx*north[0] + y_zero_reference_cell*dy*north[1])
        
        x_zero_reference_cell = 0
        y_zero_reference_cell = 0
        
        A = None
        self.max_wind_direction_perturbation = max_wind_direction_perturbation
        super(CDKLM16, self).__init__(gpu_ctx, \
                                      nx, ny, \
                                      ghost_cells_x, \
                                      ghost_cells_y, \
                                      dx, dy, dt, \
                                      g, f, r, A, \
                                      t, \
                                      theta, rk_order, \
                                      coriolis_beta, \
                                      y_zero_reference_cell, \
                                      wind_stress, \
                                      write_netcdf, \
                                      ignore_ghostcells, \
                                      offset_x, offset_y, \
                                      comm, \
                                      block_width, block_height)
        
        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-2,-2,2,2])
        self._set_interior_domain_from_sponge_cells()
        
        defines={'block_width': block_width, 'block_height': block_height,
                   'KPSIMULATOR_DESING_EPS': str(desingularization_eps)+'f',
                   'KPSIMULATOR_FLUX_SLOPE_EPS': str(flux_slope_eps)+'f',
                   'KPSIMULATOR_DEPTH_CUTOFF': str(depth_cutoff)+'f'}
        
        #Get kernels
        self.kernel = gpu_ctx.get_kernel("CDKLM16_kernel.cu", 
                defines=defines, 
                compile_args={                          # default, fast_math, optimal
                    'options' : ["--ftz=true",          # false,   true,      true
                                 "--prec-div=false",    # true,    false,     false,
                                 "--prec-sqrt=false",   # true,    false,     false
                                 "--fmad=false"]        # true,    true,      false
                    
                    #'options': ["--use_fast_math"]
                    #'options': ["--generate-line-info"], 
                    #nvcc_options=["--maxrregcount=39"],
                    #'arch': "compute_50", 
                    #'code': "sm_50"
                },
                jit_compile_args={
                    #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)]
                }
                )
        
        # Get CUDA functions and define data types for prepared_{async_}call()
        self.cdklm_swe_2D = self.kernel.get_function("cdklm_swe_2D")
        self.cdklm_swe_2D.prepare("iiffffffffiiPiPiPiPiPiPiPiPiffi")
        self.update_wind_stress(self.kernel, self.cdklm_swe_2D)
        
        # CUDA functions for finding max time step size:
        self.num_threads_dt = num_threads_dt
        self.num_blocks_dt  = np.int32(self.global_size[0]*self.global_size[1])
        self.update_dt_kernels = gpu_ctx.get_kernel("max_dt.cu",
                defines={'block_width': block_width, 
                         'block_height': block_height,
                         'NUM_THREADS': self.num_threads_dt})
        self.per_block_max_dt_kernel = self.update_dt_kernels.get_function("per_block_max_dt")
        self.per_block_max_dt_kernel.prepare("iifffPiPiPiPifPi")
        self.max_dt_reduction_kernel = self.update_dt_kernels.get_function("max_dt_reduction")
        self.max_dt_reduction_kernel.prepare("iPP")
        
            
        # Bathymetry
        self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H, boundary_conditions)
                
        # Adjust eta for possible dry states
        Hm = self.downloadBathymetry()[1]
        eta0 = np.maximum(eta0, -Hm)
        
        # Create data by uploading to device
        self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0)

        # Allocate memory for calculating maximum timestep
        host_dt = np.zeros((self.global_size[1], self.global_size[0]), dtype=np.float32)
        self.device_dt = Common.CUDAArray2D(self.gpu_stream, self.global_size[0], self.global_size[1],
                                            0, 0, host_dt)
        host_max_dt_buffer = np.zeros((1,1), dtype=np.float32)
        self.max_dt_buffer = Common.CUDAArray2D(self.gpu_stream, 1, 1, 0, 0, host_max_dt_buffer)
        self.courant_number = courant_number
        
        ## Allocating memory for geostrophical equilibrium variables
        self.reportGeostrophicEquilibrium = np.int32(reportGeostrophicEquilibrium)
        self.geoEq_uxpvy = None
        self.geoEq_Kx = None
        self.geoEq_Ly = None
        if self.reportGeostrophicEquilibrium:
            dummy_zero_array = np.zeros((ny+2*ghost_cells_y, nx+2*ghost_cells_x), dtype=np.float32, order='C') 
            self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)
            self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)
            self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)

        self.constant_equilibrium_depth = np.max(H)
        
        self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \
                                                           self.nx, \
                                                           self.ny, \
                                                           ghost_cells_x, \
                                                           ghost_cells_y, \
                                                           self.boundary_conditions, \
                                                           boundary_conditions_data, \
        )

        # Small scale perturbation:
        self.small_scale_perturbation = small_scale_perturbation
        self.small_scale_model_error = None
        self.small_scale_perturbation_interpolation_factor = small_scale_perturbation_interpolation_factor
        if small_scale_perturbation:
            if small_scale_perturbation_amplitude is None:
                self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self,
                                                                                       interpolation_factor=small_scale_perturbation_interpolation_factor,
                                                                                       use_lcg=use_lcg,
                                                                                       block_width=block_width_model_error, 
                                                                                       block_height=block_height_model_error)
            else:
                self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self, 
                                                                                       soar_q0=small_scale_perturbation_amplitude,
                                                                                       interpolation_factor=small_scale_perturbation_interpolation_factor,
                                                                                       use_lcg=use_lcg,
                                                                                       block_width=block_width_model_error, 
                                                                                       block_height=block_height_model_error)
        
        
        # Data assimilation model step size
        self.model_time_step = model_time_step
        if model_time_step is None:
            self.model_time_step = self.dt
        self.total_time_steps = 0
        
        
        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, filename=netcdf_filename, ignore_ghostcells=self.ignore_ghostcells, \
                                    offset_x=self.offset_x, offset_y=self.offset_y)
                                    
                                    
        #Upload data to GPU and bind to texture reference
        self.angle_texref = self.kernel.get_texref("angle_tex")
        self.angle_texref.set_array(cuda.np_to_array(np.ascontiguousarray(angle, dtype=np.float32), order="C"))
                    
        # Set texture parameters
        self.angle_texref.set_filter_mode(cuda.filter_mode.LINEAR) #bilinear interpolation
        self.angle_texref.set_address_mode(0, cuda.address_mode.CLAMP) #no indexing outside domain
        self.angle_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.angle_texref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES) #Use [0, 1] indexing
Пример #10
0
    def __init__(self, \
                 gpu_ctx, \
                 eta0, hu0, hv0, H, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 subsample_f=10, \
                 angle=np.array([[0]], dtype=np.float32), \
                 subsample_angle=10, \
                 latitude=None, \
                 t=0.0, \
                 theta=1.3, rk_order=2, \
                 coriolis_beta=0.0, \
                 max_wind_direction_perturbation = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 boundary_conditions_data=Common.BoundaryConditionsData(), \
                 small_scale_perturbation=False, \
                 small_scale_perturbation_amplitude=None, \
                 small_scale_perturbation_interpolation_factor = 1, \
                 model_time_step=None,
                 reportGeostrophicEquilibrium=False, \
                 use_lcg=False, \
                 write_netcdf=False, \
                 comm=None, \
                 local_particle_id=0, \
                 super_dir_name=None, \
                 netcdf_filename=None, \
                 ignore_ghostcells=False, \
                 courant_number=0.8, \
                 offset_x=0, offset_y=0, \
                 flux_slope_eps = 1.0e-1, \
                 desingularization_eps = 1.0e-1, \
                 depth_cutoff = 1.0e-5, \
                 block_width=12, block_height=32, num_threads_dt=256,
                 block_width_model_error=16, block_height_model_error=16):
        """
        Initialization routine
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        H: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        subsample_f: Subsample the coriolis f when creating texture by factor
        angle: Angle of rotation from North to y-axis as a texture (cuda.Array) or numpy array (in radians)
        subsample_angle: Subsample the angles given as input when creating texture by factor
        latitude: Specify latitude. This will override any f and beta plane already set (in radians)
        t: Start simulation at time t
        theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme
        rk_order: Order of Runge Kutta method {1,2*,3}
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees)
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        small_scale_perturbation: Boolean value for applying a stochastic model error
        small_scale_perturbation_amplitude: Amplitude (q0 coefficient) for model error
        small_scale_perturbation_interpolation_factor: Width factor for correlation in model error
        model_time_step: The size of a data assimilation model step (default same as dt)
        reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep
        use_lcg: Use LCG as the random number generator. Default is False, which means using curand.
        write_netcdf: Write the results after each superstep to a netCDF file
        comm: MPI communicator
        local_particle_id: Local (for each MPI process) particle id
        desingularization_eps: Used for desingularizing hu/h
        flux_slope_eps: Used for setting zero flux for symmetric Riemann fan
        depth_cutoff: Used for defining dry cells
        super_dir_name: Directory to write netcdf files to
        netcdf_filename: Use this filename. (If not defined, a filename will be generated by SimWriter.)
        """

        self.logger = logging.getLogger(__name__)

        assert (rk_order < 4 or rk_order > 0
                ), "Only 1st, 2nd and 3rd order Runge Kutta supported"

        if (rk_order == 3):
            assert (r == 0.0
                    ), "3rd order Runge Kutta supported only without friction"

        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        ghost_cells_x = 2
        ghost_cells_y = 2

        #Coriolis at "first" cell
        x_zero_reference_cell = ghost_cells_x
        y_zero_reference_cell = ghost_cells_y  # In order to pass it to the super constructor

        # Boundary conditions
        self.boundary_conditions = boundary_conditions

        #Compensate f for reference cell (first cell in internal of domain)
        north = np.array([np.sin(angle[0, 0]), np.cos(angle[0, 0])])
        f = f - coriolis_beta * (x_zero_reference_cell * dx * north[0] +
                                 y_zero_reference_cell * dy * north[1])

        x_zero_reference_cell = 0
        y_zero_reference_cell = 0

        A = None
        self.max_wind_direction_perturbation = max_wind_direction_perturbation
        super(CDKLM16, self).__init__(gpu_ctx, \
                                      nx, ny, \
                                      ghost_cells_x, \
                                      ghost_cells_y, \
                                      dx, dy, dt, \
                                      g, f, r, A, \
                                      t, \
                                      theta, rk_order, \
                                      coriolis_beta, \
                                      y_zero_reference_cell, \
                                      wind_stress, \
                                      write_netcdf, \
                                      ignore_ghostcells, \
                                      offset_x, offset_y, \
                                      comm, \
                                      block_width, block_height,
                                      local_particle_id=local_particle_id)

        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-2, -2, 2, 2])

        defines = {
            'block_width': block_width,
            'block_height': block_height,
            'KPSIMULATOR_DESING_EPS': "{:.12f}f".format(desingularization_eps),
            'KPSIMULATOR_FLUX_SLOPE_EPS': "{:.12f}f".format(flux_slope_eps),
            'KPSIMULATOR_DEPTH_CUTOFF': "{:.12f}f".format(depth_cutoff),
            'THETA': "{:.12f}f".format(self.theta),
            'RK_ORDER': int(self.rk_order),
            'NX': int(self.nx),
            'NY': int(self.ny),
            'DX': "{:.12f}f".format(self.dx),
            'DY': "{:.12f}f".format(self.dy),
            'GRAV': "{:.12f}f".format(self.g),
            'FRIC': "{:.12f}f".format(self.r)
        }

        #Get kernels
        self.kernel = gpu_ctx.get_kernel(
            "CDKLM16_kernel.cu",
            defines=defines,
            compile_args={  # default, fast_math, optimal
                'options': [
                    "--ftz=true",  # false,   true,      true
                    "--prec-div=false",  # true,    false,     false,
                    "--prec-sqrt=false",  # true,    false,     false
                    "--fmad=false"
                ]  # true,    true,      false

                #'options': ["--use_fast_math"]
                #'options': ["--generate-line-info"],
                #nvcc_options=["--maxrregcount=39"],
                #'arch': "compute_50",
                #'code': "sm_50"
            },
            jit_compile_args={
                #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)]
            })

        # Get CUDA functions and define data types for prepared_{async_}call()
        self.cdklm_swe_2D = self.kernel.get_function("cdklm_swe_2D")
        self.cdklm_swe_2D.prepare("fiPiPiPiPiPiPiPiPiffi")
        self.update_wind_stress(self.kernel, self.cdklm_swe_2D)

        # CUDA functions for finding max time step size:
        self.num_threads_dt = num_threads_dt
        self.num_blocks_dt = np.int32(self.global_size[0] *
                                      self.global_size[1])
        self.update_dt_kernels = gpu_ctx.get_kernel("max_dt.cu",
                                                    defines={
                                                        'block_width':
                                                        block_width,
                                                        'block_height':
                                                        block_height,
                                                        'NUM_THREADS':
                                                        self.num_threads_dt
                                                    })
        self.per_block_max_dt_kernel = self.update_dt_kernels.get_function(
            "per_block_max_dt")
        self.per_block_max_dt_kernel.prepare("iifffPiPiPiPifPi")
        self.max_dt_reduction_kernel = self.update_dt_kernels.get_function(
            "max_dt_reduction")
        self.max_dt_reduction_kernel.prepare("iPP")

        # Bathymetry
        self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny,
                                            ghost_cells_x, ghost_cells_y, H,
                                            boundary_conditions)

        # Adjust eta for possible dry states
        Hm = self.downloadBathymetry()[1]
        eta0 = np.maximum(eta0, -Hm)

        # Create data by uploading to device
        self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               eta0, hu0, hv0)

        # Allocate memory for calculating maximum timestep
        host_dt = np.zeros((self.global_size[1], self.global_size[0]),
                           dtype=np.float32)
        self.device_dt = Common.CUDAArray2D(self.gpu_stream,
                                            self.global_size[0],
                                            self.global_size[1], 0, 0, host_dt)
        host_max_dt_buffer = np.zeros((1, 1), dtype=np.float32)
        self.max_dt_buffer = Common.CUDAArray2D(self.gpu_stream, 1, 1, 0, 0,
                                                host_max_dt_buffer)
        self.courant_number = courant_number

        ## Allocating memory for geostrophical equilibrium variables
        self.reportGeostrophicEquilibrium = np.int32(
            reportGeostrophicEquilibrium)
        self.geoEq_uxpvy = None
        self.geoEq_Kx = None
        self.geoEq_Ly = None
        if self.reportGeostrophicEquilibrium:
            dummy_zero_array = np.zeros(
                (ny + 2 * ghost_cells_y, nx + 2 * ghost_cells_x),
                dtype=np.float32,
                order='C')
            self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny,
                                                  ghost_cells_x, ghost_cells_y,
                                                  dummy_zero_array)
            self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               dummy_zero_array)
            self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               dummy_zero_array)

        self.constant_equilibrium_depth = np.max(H)

        self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \
                                                           self.nx, \
                                                           self.ny, \
                                                           ghost_cells_x, \
                                                           ghost_cells_y, \
                                                           self.boundary_conditions, \
                                                           boundary_conditions_data, \
        )

        def subsample_texture(data, factor):
            ny, nx = data.shape
            dx, dy = 1 / nx, 1 / ny
            I = interp2d(np.linspace(0.5 * dx, 1 - 0.5 * dx, nx),
                         np.linspace(0.5 * dy, 1 - 0.5 * dy, ny),
                         data,
                         kind='linear')

            new_nx, new_ny = max(2, nx // factor), max(2, ny // factor)
            new_dx, new_dy = 1 / new_nx, 1 / new_ny
            x_new = np.linspace(0.5 * new_dx, 1 - 0.5 * new_dx, new_nx)
            y_new = np.linspace(0.5 * new_dy, 1 - 0.5 * new_dy, new_ny)
            return I(x_new, y_new)

        # Texture for angle
        self.angle_texref = self.kernel.get_texref("angle_tex")
        if isinstance(angle, cuda.Array):
            # angle is already a texture, so we just set the texture reference
            self.angle_texref.set_array(angle)
        else:
            #Upload data to GPU and bind to texture reference
            if (subsample_angle and angle.size >= eta0.size):
                self.logger.info("Subsampling angle texture by factor " +
                                 str(subsample_angle))
                self.logger.warning(
                    "This will give inaccurate angle along the border!")
                angle = subsample_texture(angle, subsample_angle)

            self.angle_texref.set_array(
                cuda.np_to_array(np.ascontiguousarray(angle, dtype=np.float32),
                                 order="C"))

        # Set texture parameters
        self.angle_texref.set_filter_mode(
            cuda.filter_mode.LINEAR)  #bilinear interpolation
        self.angle_texref.set_address_mode(
            0, cuda.address_mode.CLAMP)  #no indexing outside domain
        self.angle_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.angle_texref.set_flags(
            cuda.TRSF_NORMALIZED_COORDINATES)  #Use [0, 1] indexing

        # Texture for coriolis f
        self.coriolis_texref = self.kernel.get_texref("coriolis_f_tex")

        # Create the CPU coriolis
        if (latitude is not None):
            if (self.f != 0.0):
                raise RuntimeError(
                    "Cannot specify both latitude and f. Make your mind up.")
            coriolis_f, _ = OceanographicUtilities.calcCoriolisParams(latitude)
            coriolis_f = coriolis_f.astype(np.float32)
        else:
            if (self.coriolis_beta != 0.0):
                if (angle.size != 1):
                    raise RuntimeError(
                        "non-constant angle cannot be combined with beta plane model (makes no sense)"
                    )
                #Generate coordinates for all cells, including ghost cells from center to center
                # [-3/2dx, nx+3/2dx] for ghost_cells_x == 2
                x = np.linspace((-self.ghost_cells_x + 0.5) * self.dx,
                                (self.nx + self.ghost_cells_x - 0.5) * self.dx,
                                self.nx + 2 * self.ghost_cells_x)
                y = np.linspace((-self.ghost_cells_y + 0.5) * self.dy,
                                (self.ny + self.ghost_cells_y - 0.5) * self.dy,
                                self.ny + 2 * self.ghost_cells_x)
                self.logger.info(
                    "Using latitude to create Coriolis f texture ({:f}x{:f} cells)"
                    .format(x.size, y.size))
                x, y = np.meshgrid(x, y)
                n = x * np.sin(angle[0, 0]) + y * np.cos(
                    angle[0, 0])  #North vector
                coriolis_f = self.f + self.coriolis_beta * n
            else:
                if (self.f.size == 1):
                    coriolis_f = np.array([[self.f]], dtype=np.float32)
                elif (self.f.shape == eta0.shape):
                    coriolis_f = np.array(self.f, dtype=np.float32)
                else:
                    raise RuntimeError(
                        "The shape of f should match up with eta or be scalar."
                    )

        if (subsample_f and coriolis_f.size >= eta0.size):
            self.logger.info("Subsampling coriolis texture by factor " +
                             str(subsample_f))
            self.logger.warning(
                "This will give inaccurate coriolis along the border!")
            coriolis_f = subsample_texture(coriolis_f, subsample_f)

        #Upload data to GPU and bind to texture reference
        self.coriolis_texref.set_array(
            cuda.np_to_array(np.ascontiguousarray(coriolis_f,
                                                  dtype=np.float32),
                             order="C"))

        # Set texture parameters
        self.coriolis_texref.set_filter_mode(
            cuda.filter_mode.LINEAR)  #bilinear interpolation
        self.coriolis_texref.set_address_mode(
            0, cuda.address_mode.CLAMP)  #no indexing outside domain
        self.coriolis_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.coriolis_texref.set_flags(
            cuda.TRSF_NORMALIZED_COORDINATES)  #Use [0, 1] indexing

        # Small scale perturbation:
        self.small_scale_perturbation = small_scale_perturbation
        self.small_scale_model_error = None
        self.small_scale_perturbation_interpolation_factor = small_scale_perturbation_interpolation_factor
        if small_scale_perturbation:
            self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(
                self,
                soar_q0=small_scale_perturbation_amplitude,
                interpolation_factor=
                small_scale_perturbation_interpolation_factor,
                use_lcg=use_lcg,
                block_width=block_width_model_error,
                block_height=block_height_model_error)

        # Data assimilation model step size
        self.model_time_step = model_time_step
        self.total_time_steps = 0
        if model_time_step is None:
            self.model_time_step = self.dt

        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, super_dir_name=super_dir_name, filename=netcdf_filename, \
                                            ignore_ghostcells=self.ignore_ghostcells, offset_x=self.offset_x, offset_y=self.offset_y)

        # Update timestep if dt is given as zero
        if self.dt <= 0:
            self.updateDt()
Пример #11
0
    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        coriolis_beta: Coriolis linear factor -> f = f + beta*y
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        """

        #Create data by uploading to device
        ghost_cells_x = 0
        ghost_cells_y = 0
        y_zero_reference_cell = y_zero_reference_cell
        self.asym_ghost_cells = [0, 0, 0, 0]  # [N, E, S, W]

        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([None, None, 0, 0])

        self.boundary_conditions = boundary_conditions
        # Add asym ghost cell if periodic boundary condition:
        if (self.boundary_conditions.north == 2) or \
           (self.boundary_conditions.south == 2):
            self.asym_ghost_cells[0] = 1
            self.interior_domain_indices[0] = -1
        if (self.boundary_conditions.east == 2) or \
           (self.boundary_conditions.west == 2):
            self.asym_ghost_cells[1] = 1
            self.interior_domain_indices[1] = -1

        if boundary_conditions.isSponge():
            nx = nx + boundary_conditions.spongeCells[
                1] + boundary_conditions.spongeCells[
                    3]  # - self.asym_ghost_cells[1] - self.asym_ghost_cells[3]
            ny = ny + boundary_conditions.spongeCells[
                0] + boundary_conditions.spongeCells[
                    2]  # - self.asym_ghost_cells[0] - self.asym_ghost_cells[2]
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[
                2]

        rk_order = None
        theta = None
        A = None
        super(FBL, self).__init__(gpu_ctx, \
                                  nx, ny, \
                                  ghost_cells_x, \
                                  ghost_cells_y, \
                                  dx, dy, dt, \
                                  g, f, r, A, \
                                  t, \
                                  theta, rk_order, \
                                  coriolis_beta, \
                                  y_zero_reference_cell, \
                                  wind_stress, \
                                  write_netcdf, \
                                  ignore_ghostcells, \
                                  offset_x, offset_y, \
                                  block_width, block_height)
        self._set_interior_domain_from_sponge_cells()

        #Get kernels
        self.u_kernel = gpu_ctx.get_kernel("FBL_U_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.v_kernel = gpu_ctx.get_kernel("FBL_V_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.eta_kernel = gpu_ctx.get_kernel("FBL_eta_kernel.cu",
                                             defines={
                                                 'block_width': block_width,
                                                 'block_height': block_height
                                             })

        # Get CUDA functions
        self.computeUKernel = self.u_kernel.get_function("computeUKernel")
        self.computeVKernel = self.v_kernel.get_function("computeVKernel")
        self.computeEtaKernel = self.eta_kernel.get_function(
            "computeEtaKernel")

        # Prepare kernel lauches
        self.computeUKernel.prepare("iiffffffffPiPiPiPif")
        self.computeVKernel.prepare("iiffffffffPiPiPiPif")
        self.computeEtaKernel.prepare("iiffffffffPiPiPiPi")

        # Set up textures
        self.update_wind_stress(self.u_kernel, self.computeUKernel)
        self.update_wind_stress(self.v_kernel, self.computeVKernel)

        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x,
                                    ghost_cells_y, H, self.asym_ghost_cells)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               eta0, hu0, hv0,
                                               self.asym_ghost_cells)

        # Overwrite halo with asymetric ghost cells
        self.nx_halo = np.int32(nx + self.asym_ghost_cells[1] +
                                self.asym_ghost_cells[3])
        self.ny_halo = np.int32(ny + self.asym_ghost_cells[0] +
                                self.asym_ghost_cells[2])

        self.bc_kernel = FBL_periodic_boundary(self.gpu_ctx, \
                                               self.nx, \
                                               self.ny, \
                                               self.boundary_conditions, \
                                               self.asym_ghost_cells
        )

        self.totalNumIterations = 0
        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)
Пример #12
0
    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, A=0.0, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        A: Eddy viscosity coefficient (O(dx))
        t: Start simulation at time t
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        """

        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        halo_x = 1
        halo_y = 1
        ghost_cells_x = 1
        ghost_cells_y = 1
        y_zero_reference_cell = y_zero_reference_cell + 1

        self.boundary_conditions = boundary_conditions
        if boundary_conditions.isSponge():
            nx = nx + boundary_conditions.spongeCells[
                1] + boundary_conditions.spongeCells[3] - 2 * ghost_cells_x
            ny = ny + boundary_conditions.spongeCells[
                0] + boundary_conditions.spongeCells[2] - 2 * ghost_cells_y
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[
                2]

        # self.<parameters> are sat in parent constructor:
        rk_order = None
        theta = None
        super(CTCS, self).__init__(gpu_ctx, \
                                   nx, ny, \
                                   ghost_cells_x, \
                                   ghost_cells_y, \
                                   dx, dy, dt, \
                                   g, f, r, A, \
                                   t, \
                                   theta, rk_order, \
                                   coriolis_beta, \
                                   y_zero_reference_cell, \
                                   wind_stress, \
                                   write_netcdf, \
                                   ignore_ghostcells, \
                                   offset_x, offset_y, \
                                   block_width, block_height)

        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-1, -1, 1, 1])
        self._set_interior_domain_from_sponge_cells()

        #Get kernels
        self.u_kernel = gpu_ctx.get_kernel("CTCS_U_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.v_kernel = gpu_ctx.get_kernel("CTCS_V_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.eta_kernel = gpu_ctx.get_kernel("CTCS_eta_kernel.cu",
                                             defines={
                                                 'block_width': block_width,
                                                 'block_height': block_height
                                             })

        # Get CUDA functions
        self.computeUKernel = self.u_kernel.get_function("computeUKernel")
        self.computeVKernel = self.v_kernel.get_function("computeVKernel")
        self.computeEtaKernel = self.eta_kernel.get_function(
            "computeEtaKernel")

        # Prepare kernel lauches
        self.computeUKernel.prepare("iiiifffffffffPiPiPiPiPif")
        self.computeVKernel.prepare("iiiifffffffffPiPiPiPiPif")
        self.computeEtaKernel.prepare("iiffffffffPiPiPi")

        # Set up textures
        self.update_wind_stress(self.u_kernel, self.computeUKernel)
        self.update_wind_stress(self.v_kernel, self.computeVKernel)

        #Create data by uploading to device
        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, halo_x, halo_y, H)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, halo_x,
                                               halo_y, eta0, hu0, hv0)

        # Global size needs to be larger than the default from parent.__init__
        self.global_size = ( \
                       int(np.ceil((self.nx+2*halo_x) / float(self.local_size[0]))), \
                       int(np.ceil((self.ny+2*halo_y) / float(self.local_size[1]))) \
                      )

        self.bc_kernel = CTCS_boundary_condition(gpu_ctx, \
                                                 self.nx, \
                                                 self.ny, \
                                                 self.boundary_conditions, \
                                                 halo_x, halo_y \
        )

        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)
Пример #13
0
    def __init__(self, \
                 gpu_ctx, \
                 eta0, hu0, hv0, Hi, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 t=0.0, \
                 theta=1.3, rk_order=2, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 0, \
                 max_wind_direction_perturbation = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 small_scale_perturbation=False, \
                 small_scale_perturbation_amplitude=None, \
                 h0AsWaterElevation=False, \
                 reportGeostrophicEquilibrium=False, \
                 write_netcdf=False, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=32, block_height=4):
        """
        Initialization routine
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        Hi: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        t: Start simulation at time t
        theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme
        rk_order: Order of Runge Kutta method {1,2*,3}
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees)
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        h0AsWaterElevation: True if h0 is described by the surface elevation, and false if h0 is described by water depth
        reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep
        write_netcdf: Write the results after each superstep to a netCDF file
        """
               
        

        ## After changing from (h, B) to (eta, H), several of the simulator settings used are wrong. This check will help detect that.
        if ( np.sum(eta0 - Hi[:-1, :-1] > 0) > nx):
            assert(False), "It seems you are using water depth/elevation h and bottom topography B, while you should use water level eta and equillibrium depth H."
        
        assert( rk_order < 4 or rk_order > 0 ), "Only 1st, 2nd and 3rd order Runge Kutta supported"

        if (rk_order == 3):
            assert(r == 0.0), "3rd order Runge Kutta supported only without friction"
        
        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        ghost_cells_x = 2
        ghost_cells_y = 2
        y_zero_reference_cell = 2 + y_zero_reference_cell
        
        # Boundary conditions
        self.boundary_conditions = boundary_conditions
        if (boundary_conditions.isSponge()):
            nx = nx + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] - 2*ghost_cells_x
            ny = ny + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] - 2*ghost_cells_y
            y_zero_reference_cell = boundary_conditions.spongeCells[2] + y_zero_reference_cell
        
        A = None
        self.max_wind_direction_perturbation = max_wind_direction_perturbation
        super(CDKLM16, self).__init__(gpu_ctx, \
                                      nx, ny, \
                                      ghost_cells_x, \
                                      ghost_cells_y, \
                                      dx, dy, dt, \
                                      g, f, r, A, \
                                      t, \
                                      theta, rk_order, \
                                      coriolis_beta, \
                                      y_zero_reference_cell, \
                                      wind_stress, \
                                      write_netcdf, \
                                      ignore_ghostcells, \
                                      offset_x, offset_y, \
                                      block_width, block_height)
        
        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-2,-2,2,2])
        self._set_interior_domain_from_sponge_cells()
        
        #Get kernels
        self.kernel = gpu_ctx.get_kernel("CDKLM16_kernel.cu", defines={'block_width': block_width, 'block_height': block_height})
        
        # Get CUDA functions and define data types for prepared_{async_}call()
        self.swe_2D = self.kernel.get_function("swe_2D")
        self.swe_2D.prepare("iifffffffffiiPiPiPiPiPiPiPiPifiiiiiPiPiPi")
        self.update_wind_stress(self.kernel, self.swe_2D)
        
        #Create data by uploading to device
        self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0)

        ## Allocating memory for geostrophical equilibrium variables
        self.reportGeostrophicEquilibrium = np.int32(reportGeostrophicEquilibrium)
        dummy_zero_array = np.zeros((ny+2*ghost_cells_y, nx+2*ghost_cells_x), dtype=np.float32, order='C') 
        self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)
        self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)
        self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)

        #Bathymetry
        self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, Hi, boundary_conditions)
        self.h0AsWaterElevation = h0AsWaterElevation
        if self.h0AsWaterElevation:
            self.bathymetry.waterElevationToDepth(self.gpu_data.h0)
        
        self.constant_equilibrium_depth = np.max(Hi)
        
        self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \
                                                           self.nx, \
                                                           self.ny, \
                                                           ghost_cells_x, \
                                                           ghost_cells_y, \
                                                           self.boundary_conditions, \
        )

        # Small scale perturbation:
        self.small_scale_perturbation = small_scale_perturbation
        self.small_scale_model_error = None
        if small_scale_perturbation:
            if small_scale_perturbation_amplitude is None:
                self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self)
            else:
                self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self, soar_q0=small_scale_perturbation_amplitude)
        
        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    offset_x=self.offset_x, offset_y=self.offset_y)
Пример #14
0
    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 1, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 comm=None, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+3) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        coriolis_beta: Coriolis linear factor -> f = f + beta*y
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        comm: MPI communicator
        """
        
        #### THIS ALLOWS MAKES IT POSSIBLE TO GIVE THE OLD INPUT SHAPES TO NEW GHOST CELL REGIME: Only valid for benchmarking!
        if (eta0.shape == (ny, nx)):
            new_eta = np.zeros((ny+2, nx+2),  dtype=np.float32)
            new_eta[:ny, :nx] = eta0.copy()
            eta0 = new_eta.copy()
        if (H.shape == (ny, nx)):
            new_H = np.ones((ny+2, nx+2),  dtype=np.float32)*np.max(H)
            new_H[:ny,:nx] = H.copy()
            H = new_H.copy()
        if (hu0.shape == (ny, nx+1)):
            new_hu = np.zeros((ny+2, nx+1),  dtype=np.float32)
            new_hu[:ny, :nx+1] = hu0.copy()
            hu0 = new_hu.copy()
        if (hv0.shape == (ny+1, nx)):
            new_hv = np.zeros((ny+3, nx+2),  dtype=np.float32)
            new_hv[:ny+1,:nx] = hv0.copy()
            hv0 = new_hv.copy()
        
            
         
        
        
        #Create data by uploading to device
        ghost_cells_x = 1
        ghost_cells_y = 1
        y_zero_reference_cell = y_zero_reference_cell
        
        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-1, -1, 1, 1])
        
        self.boundary_conditions = boundary_conditions

        if boundary_conditions.isSponge():
            nx = nx - 2 + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3]
            ny = ny - 2 + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2]
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[2]
          
        rk_order = None
        theta = None
        A = None
        super(FBL, self).__init__(gpu_ctx, \
                                  nx, ny, \
                                  ghost_cells_x, \
                                  ghost_cells_y, \
                                  dx, dy, dt, \
                                  g, f, r, A, \
                                  t, \
                                  theta, rk_order, \
                                  coriolis_beta, \
                                  y_zero_reference_cell, \
                                  wind_stress, \
                                  write_netcdf, \
                                  ignore_ghostcells, \
                                  offset_x, offset_y, \
                                  comm, \
                                  block_width, block_height)
        self._set_interior_domain_from_sponge_cells()
        
        
        #Get kernels
        self.step_kernel = gpu_ctx.get_kernel("FBL_step_kernel.cu", 
                defines={'block_width': block_width, 'block_height': block_height},
                compile_args={
                    'no_extern_c': True,
                    'options': ["--use_fast_math"],
                    #'options': ["--generate-line-info"], 
                    #'options': ["--maxrregcount=32"]
                    #'arch': "compute_50", 
                    #'code': "sm_50"
                },
                jit_compile_args={
                    #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)]
                }
        )
         
        # Get CUDA functions 
        self.fblStepKernel = self.step_kernel.get_function("fblStepKernel")
        
        # Prepare kernel lauches
        self.fblStepKernel.prepare("iiffffffffPiPiPiPiif")
        
        # Set up textures
        self.update_wind_stress(self.step_kernel, self.fblStepKernel)
        
        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0, fbl=True)
        
        # Domain including ghost cells
        self.nx_halo = np.int32(nx + 2)
        self.ny_halo = np.int32(ny + 2)
       
        self.bc_kernel = FBL_boundary_conditions(self.gpu_ctx, \
                                                 self.nx, \
                                                 self.ny, \
                                                 self.boundary_conditions
        )
        
        # Bit-wise boolean for wall boundary conditions
        self.wall_bc = np.int32(0)
        if (self.boundary_conditions.north == 1):
            self.wall_bc = self.wall_bc | 0x01
        if (self.boundary_conditions.east == 1):
            self.wall_bc = self.wall_bc | 0x02
        if (self.boundary_conditions.south == 1):
            self.wall_bc = self.wall_bc | 0x04
        if (self.boundary_conditions.west == 1):
            self.wall_bc = self.wall_bc | 0x08

        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, \
                                    offset_x=self.offset_x, offset_y=self.offset_y)
Пример #15
0
    def __init__(self, gpu_ctx, gpu_stream,
                 nx, ny, dx, dy,
                 boundaryConditions, staggered,
                 soar_q0=None, soar_L=None,
                 block_width=16, block_height=16):
        
        self.random_numbers = None
        self.seed = None
        
        self.gpu_ctx = gpu_ctx
        self.gpu_stream = gpu_stream
        
        self.nx = np.int32(nx)
        self.ny = np.int32(ny)
        self.dx = np.float32(dx)
        self.dy = np.float32(dy)
        self.staggered = np.int(0)
        if staggered:
            self.staggered = np.int(1)
            
        # The cutoff parameter is hard-coded.
        # The size of the cutoff determines the computational radius in the
        # SOAR function. Hence, the size of the local memory in the OpenCL 
        # kernels has to be hard-coded.
        self.cutoff = np.int32(config.soar_cutoff) 
        
        self.periodicNorthSouth = np.int32(boundaryConditions.isPeriodicNorthSouth())
        self.periodicEastWest = np.int32(boundaryConditions.isPeriodicEastWest())
        
        # Size of random field and seed
        self.rand_nx = np.int32(nx + 2*(1+self.cutoff))
        self.rand_ny = np.int32(ny + 2*(1+self.cutoff))
        if self.periodicEastWest:
            self.rand_nx = np.int32(nx)
        if self.periodicNorthSouth:
            self.rand_ny = np.int32(ny)
        self.seed_ny = np.int32(self.rand_ny)
        self.seed_nx = np.int32(self.rand_nx/2) 
        ### WHAT IF rand_nx IS ODD??
        # For now, we check this by assert
        assert(self.rand_nx % 2 == 0), "The OceanStateNoise module might not work with odd Nx, so just to be sure you are not allowed to use odd Nx for now :)"
        
        # Constants for the SOAR function:
        self.soar_q0 = np.float32(self.dx/100000)
        if soar_q0 is not None:
            self.soar_q0 = np.float32(soar_q0)
            
        self.soar_L = np.float32(0.75*self.dx)
        if soar_L is not None:
            self.soar_L = np.float32(soar_L)
        
        # Generate seed:
        self.floatMax = 2147483648.0
        self.host_seed = np.random.rand(self.seed_ny, self.seed_nx)*self.floatMax
        self.host_seed = self.host_seed.astype(np.uint64, order='C')

        self.seed = Common.CUDAArray2D(gpu_stream, self.seed_nx, self.seed_ny, 0, 0, self.host_seed, double_precision=True, integers=True)
        
        # Allocate memory for random numbers
        self.random_numbers_host = np.zeros((self.rand_ny, self.rand_nx), dtype=np.float32, order='C')
        self.random_numbers = Common.CUDAArray2D(self.gpu_stream, self.rand_nx, self.rand_ny, 0, 0, self.random_numbers_host)
        
        # Generate kernels
        self.kernels = gpu_ctx.get_kernel("ocean_noise.cu", \
                                          defines={'block_width': block_width, 'block_height': block_height})
 
        # Get CUDA functions and define data types for prepared_{async_}call()
        self.uniformDistributionKernel = self.kernels.get_function("uniformDistribution")
        self.uniformDistributionKernel.prepare("iiiPiPi")
        
        self.normalDistributionKernel = self.kernels.get_function("normalDistribution")
        self.normalDistributionKernel.prepare("iiiPiPi")
        
        self.perturbOceanKernel = self.kernels.get_function("perturbOcean")
        self.perturbOceanKernel.prepare("iiffiiffffffiiPiPiPiPiPi")
        
        #Compute kernel launch parameters
        self.local_size = (block_width, block_height, 1) 
        self.global_size_random_numbers = ( \
                       int(np.ceil(self.seed_nx / float(self.local_size[0]))), \
                       int(np.ceil(self.seed_ny / float(self.local_size[1]))) \
                                  ) 
        self.global_size_noise = ( \
                       int(np.ceil(self.rand_nx / float(self.local_size[0]))), \
                       int(np.ceil(self.rand_ny / float(self.local_size[1]))) \
                                  ) 
Пример #16
0
 def init_double(self):
     self.double_cudaarray = Common.CUDAArray2D(self.gpu_stream, \
                                              self.nx, self.ny, \
                                              self.nx_halo, self.ny_halo, \
                                              self.dbuf1, \
                                              double_precision=True)