Python Common.CUDAArray2D примеры использования

Язык программирования: Python

Пространство имен/Пакет: SWESimulators

Класс/Тип: Common

Метод/Функция: CUDAArray2D

Примеров на hotexamples.com: 16

Python Common.CUDAArray2D - 16 примеров найдено. Это лучшие примеры Python кода для SWESimulators.Common.CUDAArray2D, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BoundaryConditions(30)

CUDAArray2D(16)

CUDAContext(15)

ProgressPrinter(7)

Bathymetry(5)

BoundaryConditionsArakawaA(5)

SWEDataArakawaA(5)

BoundaryConditionsData(3)

SWEDataArakawaC(3)

OpenCLArray2D(2)

WindStressParams(2)

IPEngine(1)

SingleBoundaryConditionData(1)

Пример #1

Показать файл

Файл: OceanStateNoise_test.py Проект: kaihc/gpu-ocean

 def allocateBuffers(self, HCPU):
     host_buffer = np.zeros((self.ny, self.nx))
     self.eta = Common.CUDAArray2D(self.gpu_stream, self.nx, self.ny, 0, 0,
                                   host_buffer)
     self.hu = Common.CUDAArray2D(self.gpu_stream, self.nx, self.ny, 0, 0,
                                  host_buffer)
     self.hv = Common.CUDAArray2D(self.gpu_stream, self.nx, self.ny, 0, 0,
                                  host_buffer)
     self.H = Common.CUDAArray2D(self.gpu_stream, self.nx + 1, self.ny + 1,
                                 0, 0, HCPU)
     del host_buffer

Пример #2

Показать файл

    def _setupGPU(self):
        """
        Setting up kernel for reading observations, along with host and device buffers
        """
        # Create observation buffer!
        if self.observation_type == dautils.ObservationType.UnderlyingFlow or \
            self.observation_type == dautils.ObservationType.DirectUnderlyingFlow:

            zeros = np.zeros((self.driftersPerOceanModel, 2),
                             dtype=np.float32,
                             order='C')
            self.observation_buffer = Common.CUDAArray2D(self.gpu_stream, \
                                                         2, self.driftersPerOceanModel, 0, 0, \
                                                         zeros)

            # Generate kernels
            self.observation_kernels = self.gpu_ctx.get_kernel("observationKernels.cu", \
                                                             defines={})

            # Get CUDA functions and define data types for prepared_{async_}call()
            self.observeUnderlyingFlowKernel = self.observation_kernels.get_function(
                "observeUnderlyingFlow")
            self.observeUnderlyingFlowKernel.prepare("iiffiiPiPiPifiPiPi")

            self.local_size = (int(self.driftersPerOceanModel), 1, 1)
            self.global_size = (1, 1)

Пример #3

Показать файл

    def test_copy_buffer(self):
        clarray2 = Common.CUDAArray2D(self.gpu_stream, \
                                      self.nx, self.ny, self.nx_halo, self.ny_halo, \
                                      self.buf3)

        host_data_pre_copy = self.cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_pre_copy.tolist(), self.buf1.tolist())

        self.cudaarray.copyBuffer(self.gpu_stream, clarray2)
        host_data_post_copy = self.cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_post_copy.tolist(), self.buf3.tolist())

        self.tests_failed = False

Пример #4

Показать файл

Файл: GPUDrifterCollection.py Проект: babrodtk/gpu-ocean

    def __init__(self, gpu_ctx, numDrifters, \
                 observation_variance=0.01, \
                 boundaryConditions=Common.BoundaryConditions(), \
                 initialization_cov_drifters=None, \
                 domain_size_x=1.0, domain_size_y=1.0, \
                 gpu_stream=None, \
                 block_width = 64):
        
        super(GPUDrifterCollection, self).__init__(numDrifters,
                                observation_variance=observation_variance,
                                boundaryConditions=boundaryConditions,
                                domain_size_x=domain_size_x, 
                                domain_size_y=domain_size_y)
        
        # Define CUDA environment:
        self.gpu_ctx = gpu_ctx
        self.block_width = block_width
        self.block_height = 1
        
        # TODO: Where should the cl_queue come from?
        # For sure, the drifter and the ocean simulator should use 
        # the same queue...
        self.gpu_stream = gpu_stream
        if self.gpu_stream is None:
            self.gpu_stream = cuda.Stream()
                
        self.sensitivity = 1.0
         
        self.driftersHost = np.zeros((self.getNumDrifters() + 1, 2)).astype(np.float32, order='C')
        self.driftersDevice = Common.CUDAArray2D(self.gpu_stream, \
                                                 2, self.getNumDrifters()+1, 0, 0, \
                                                 self.driftersHost)
        
        self.drift_kernels = gpu_ctx.get_kernel("driftKernels.cu", \
                                                defines={'block_width': self.block_width, 'block_height': self.block_height})

        # Get CUDA functions and define data types for prepared_{async_}call()
        self.passiveDrifterKernel = self.drift_kernels.get_function("passiveDrifterKernel")
        self.passiveDrifterKernel.prepare("iifffiiPiPiPifiiiPif")
        self.enforceBoundaryConditionsKernel = self.drift_kernels.get_function("enforceBoundaryConditions")
        self.enforceBoundaryConditionsKernel.prepare("ffiiiPi")
        
        self.local_size = (self.block_width, self.block_height, 1)
        self.global_size = (\
                            int(np.ceil((self.getNumDrifters() + 2)/float(self.block_width))), \
                            1)
        
        # Initialize drifters:
        self.uniformly_distribute_drifters(initialization_cov_drifters=initialization_cov_drifters)

Пример #5

Показать файл

    def test_cross_precision_copy_buffer(self):
        self.init_double()

        single_cudaarray2 = Common.CUDAArray2D(self.gpu_stream, \
                                               self.nx, self.ny, \
                                               self.nx_halo, self.ny_halo, \
                                               self.buf3)

        host_data_pre_copy = self.double_cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_pre_copy.tolist(), self.dbuf1.tolist())

        with self.assertRaises(AssertionError):
            self.double_cudaarray.copyBuffer(self.gpu_stream,
                                             single_cudaarray2)

        self.tests_failed = False

Пример #6

Показать файл

    def test_double_copy_buffer(self):
        self.init_double()

        double_cudaarray2 = Common.CUDAArray2D(self.gpu_stream, \
                                               self.nx, self.ny, \
                                               self.nx_halo, self.ny_halo, \
                                               self.dbuf3, \
                                               double_precision=True)

        host_data_pre_copy = self.double_cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_pre_copy.tolist(), self.dbuf1.tolist())

        self.double_cudaarray.copyBuffer(self.gpu_stream, double_cudaarray2)
        host_data_post_copy = self.double_cudaarray.download(self.gpu_stream)
        self.assertEqual(host_data_post_copy.tolist(), self.dbuf3.tolist())

        self.tests_failed = False

Пример #7

Показать файл

    def setUp(self):

        #Set which CL device to use, and disable kernel caching
        self.gpu_ctx = Common.CUDAContext()

        # Make some host data which we can play with
        self.nx = 3
        self.ny = 5
        self.nx_halo = 1
        self.ny_halo = 2
        self.dataShape = (self.ny + 2 * self.ny_halo,
                          self.nx + 2 * self.nx_halo)

        self.buf1 = np.zeros(self.dataShape, dtype=np.float32, order='C')
        self.dbuf1 = np.zeros(self.dataShape)
        self.buf3 = np.zeros(self.dataShape, dtype=np.float32, order='C')
        self.dbuf3 = np.zeros(self.dataShape)
        for j in range(self.dataShape[0]):
            for i in range(self.dataShape[1]):
                self.buf1[j, i] = i * 100 + j
                self.dbuf1[j, i] = self.buf1[j, i]
                self.buf3[j, i] = j * 1000 - i
                self.dbuf3[j, i] = self.buf3[j, i]

        self.explicit_free = False

        self.device_name = self.gpu_ctx.cuda_device.name()
        self.gpu_stream = cuda.Stream()

        self.tests_failed = True

        self.cudaarray = Common.CUDAArray2D(self.gpu_stream, \
                                            self.nx, self.ny, \
                                            self.nx_halo, self.ny_halo, \
                                            self.buf1)

        self.double_cudaarray = None

Пример #8

Показать файл

    def __init__(self,
                 gpu_ctx,
                 gpu_stream,
                 nx,
                 ny,
                 dx,
                 dy,
                 boundaryConditions,
                 staggered,
                 soar_q0=None,
                 soar_L=None,
                 interpolation_factor=1,
                 use_lcg=False,
                 angle=np.array([[0]], dtype=np.float32),
                 coriolis_f=np.array([[0]], dtype=np.float32),
                 block_width=16,
                 block_height=16):
        """
        Initiates a class that generates small scale geostrophically balanced perturbations of
        the ocean state.
        (nx, ny): number of internal grid cells in the domain
        (dx, dy): size of each grid cell
        soar_q0: amplitude parameter for the perturbation, default: dx*1e-5
        soar_L: length scale of the perturbation covariance, default: 0.74*dx*interpolation_factor
        interpolation_factor: indicates that the perturbation of eta should be generated on a coarse mesh, 
            and then interpolated down to the computational mesh. The coarse mesh will then have
            (nx/interpolation_factor, ny/interpolation_factor) grid cells.
        use_lcg: LCG is a linear algorithm for generating a serie of pseudo-random numbers
        angle: Angle of rotation from North to y-axis as a texture (cuda.Array) or numpy array
        (block_width, block_height): The size of each GPU block
        """

        self.use_lcg = use_lcg

        # Set numpy random state
        self.random_state = np.random.RandomState()

        # Make sure that all variables initialized within ifs are defined
        self.random_numbers = None
        self.rng = None
        self.seed = None
        self.host_seed = None

        self.gpu_ctx = gpu_ctx
        self.gpu_stream = gpu_stream

        self.nx = np.int32(nx)
        self.ny = np.int32(ny)
        self.dx = np.float32(dx)
        self.dy = np.float32(dy)
        self.staggered = np.int(0)
        if staggered:
            self.staggered = np.int(1)

        # The cutoff parameter is hard-coded.
        # The size of the cutoff determines the computational radius in the
        # SOAR function. Hence, the size of the local memory in the OpenCL
        # kernels has to be hard-coded.
        self.cutoff = np.int32(config.soar_cutoff)

        # Check that the interpolation factor plays well with the grid size:
        assert (interpolation_factor > 0 and interpolation_factor % 2
                == 1), 'interpolation_factor must be a positive odd integer'

        assert (nx % interpolation_factor == 0
                ), 'nx must be divisible by the interpolation factor'
        assert (ny % interpolation_factor == 0
                ), 'ny must be divisible by the interpolation factor'
        self.interpolation_factor = np.int32(interpolation_factor)

        # The size of the coarse grid
        self.coarse_nx = np.int32(nx / self.interpolation_factor)
        self.coarse_ny = np.int32(ny / self.interpolation_factor)
        self.coarse_dx = np.float32(dx * self.interpolation_factor)
        self.coarse_dy = np.float32(dy * self.interpolation_factor)

        self.periodicNorthSouth = np.int32(
            boundaryConditions.isPeriodicNorthSouth())
        self.periodicEastWest = np.int32(
            boundaryConditions.isPeriodicEastWest())

        # Size of random field and seed
        # The SOAR function is a stencil which requires cutoff number of grid cells,
        # and the interpolation operator requires further 2 ghost cell values in each direction.
        # The random field must therefore be created with 2 + cutoff number of ghost cells.
        self.rand_ghost_cells_x = np.int32(2 + self.cutoff)
        self.rand_ghost_cells_y = np.int32(2 + self.cutoff)
        if self.periodicEastWest:
            self.rand_ghost_cells_x = np.int32(0)
        if self.periodicNorthSouth:
            self.rand_ghost_cells_y = np.int32(0)
        self.rand_nx = np.int32(self.coarse_nx + 2 * self.rand_ghost_cells_x)
        self.rand_ny = np.int32(self.coarse_ny + 2 * self.rand_ghost_cells_y)

        # Since normal distributed numbers are generated in pairs, we need to store half the number of
        # of seed values compared to the number of random numbers.
        self.seed_ny = np.int32(self.rand_ny)
        self.seed_nx = np.int32(np.ceil(self.rand_nx / 2))

        # Generate seed:
        self.floatMax = 2147483648.0
        if self.use_lcg:
            self.host_seed = self.random_state.rand(
                self.seed_ny, self.seed_nx) * self.floatMax
            self.host_seed = self.host_seed.astype(np.uint64, order='C')

        if not self.use_lcg:
            self.rng = XORWOWRandomNumberGenerator()
        else:
            self.seed = Common.CUDAArray2D(gpu_stream,
                                           self.seed_nx,
                                           self.seed_ny,
                                           0,
                                           0,
                                           self.host_seed,
                                           double_precision=True,
                                           integers=True)

        # Constants for the SOAR function:
        self.soar_q0 = np.float32(self.dx / 100000)
        if soar_q0 is not None:
            self.soar_q0 = np.float32(soar_q0)

        self.soar_L = np.float32(0.75 * self.coarse_dx)
        if soar_L is not None:
            self.soar_L = np.float32(soar_L)

        # Allocate memory for random numbers (xi)
        self.random_numbers_host = np.zeros((self.rand_ny, self.rand_nx),
                                            dtype=np.float32,
                                            order='C')
        self.random_numbers = Common.CUDAArray2D(self.gpu_stream, self.rand_nx,
                                                 self.rand_ny, 0, 0,
                                                 self.random_numbers_host)

        # Allocate a second buffer for random numbers (nu)
        self.perpendicular_random_numbers_host = np.zeros(
            (self.rand_ny, self.rand_nx), dtype=np.float32, order='C')
        self.perpendicular_random_numbers = Common.CUDAArray2D(
            self.gpu_stream, self.rand_nx, self.rand_ny, 0, 0,
            self.random_numbers_host)

        # Allocate memory for coarse buffer if needed
        # Two ghost cells in each direction needed for bicubic interpolation
        self.coarse_buffer_host = np.zeros(
            (self.coarse_ny + 4, self.coarse_nx + 4),
            dtype=np.float32,
            order='C')
        self.coarse_buffer = Common.CUDAArray2D(self.gpu_stream,
                                                self.coarse_nx, self.coarse_ny,
                                                2, 2, self.coarse_buffer_host)

        # Allocate extra memory needed for reduction kernels.
        # Currently: A single GPU buffer with 3x1 elements: [xi^T * xi, nu^T * nu, xi^T * nu]
        self.reduction_buffer = None
        reduction_buffer_host = np.zeros((1, 3), dtype=np.float32)
        self.reduction_buffer = Common.CUDAArray2D(self.gpu_stream, 3, 1, 0, 0,
                                                   reduction_buffer_host)

        # Generate kernels
        self.kernels = gpu_ctx.get_kernel("ocean_noise.cu", \
                                          defines={'block_width': block_width, 'block_height': block_height},
                                          compile_args={
                                              'options': ["--use_fast_math",
                                                          "--maxrregcount=32"]
                                          })

        self.reduction_kernels = self.gpu_ctx.get_kernel("reductions.cu", \
                                                         defines={})

        # Get CUDA functions and define data types for prepared_{async_}call()
        # Generate kernels
        self.squareSumKernel = self.reduction_kernels.get_function("squareSum")
        self.squareSumKernel.prepare("iiPP")

        self.squareSumDoubleKernel = self.reduction_kernels.get_function(
            "squareSumDouble")
        self.squareSumDoubleKernel.prepare("iiPPP")

        self.makePerpendicularKernel = self.kernels.get_function(
            "makePerpendicular")
        self.makePerpendicularKernel.prepare("iiPiPiP")

        self.uniformDistributionKernel = self.kernels.get_function(
            "uniformDistribution")
        self.uniformDistributionKernel.prepare("iiiPiPi")

        self.normalDistributionKernel = None
        if self.use_lcg:
            self.normalDistributionKernel = self.kernels.get_function(
                "normalDistribution")
            self.normalDistributionKernel.prepare("iiiPiPi")

        self.soarKernel = self.kernels.get_function("SOAR")
        self.soarKernel.prepare("iifffffiiPiPii")

        self.geostrophicBalanceKernel = self.kernels.get_function(
            "geostrophicBalance")
        self.geostrophicBalanceKernel.prepare("iiffiiffffPiPiPiPiPif")

        self.bicubicInterpolationKernel = self.kernels.get_function(
            "bicubicInterpolation")
        self.bicubicInterpolationKernel.prepare(
            "iiiiffiiiiffiiffffPiPiPiPiPif")

        #Compute kernel launch parameters
        self.local_size = (block_width, block_height, 1)

        self.local_size_reductions = (128, 1, 1)
        self.global_size_reductions = (1, 1)

        # Launch one thread for each seed, which in turns generates two iid N(0,1)
        self.global_size_random_numbers = ( \
                       int(np.ceil(self.seed_nx / float(self.local_size[0]))), \
                       int(np.ceil(self.seed_ny / float(self.local_size[1]))) \
                     )

        # Launch on thread for each random number (in order to create perpendicular random numbers)
        self.global_size_perpendicular = ( \
                      int(np.ceil(self.rand_nx / float(self.local_size[0]))), \
                      int(np.ceil(self.rand_ny / float(self.local_size[1]))) \
                     )

        # Launch one thread per SOAR-correlated result - need to write to two ghost
        # cells in order to do bicubic interpolation based on the result
        self.global_size_SOAR = ( \
                     int(np.ceil( (self.coarse_nx+4)/float(self.local_size[0]))), \
                     int(np.ceil( (self.coarse_ny+4)/float(self.local_size[1]))) \
                    )

        # One thread per resulting perturbed grid cell
        self.global_size_geo_balance = ( \
                    int(np.ceil( (self.nx)/float(self.local_size[0]))), \
                    int(np.ceil( (self.ny)/float(self.local_size[1]))) \
                   )

        # Texture for coriolis field
        self.coriolis_texref = self.kernels.get_texref("coriolis_f_tex")
        if isinstance(coriolis_f, cuda.Array):
            # coriolis_f is already a texture, so we just set the reference
            self.coriolis_texref.set_array(coriolis_f)
        else:
            #Upload data to GPU and bind to texture reference
            self.coriolis_texref.set_array(
                cuda.np_to_array(np.ascontiguousarray(coriolis_f,
                                                      dtype=np.float32),
                                 order="C"))

        # Set texture parameters
        self.coriolis_texref.set_filter_mode(
            cuda.filter_mode.LINEAR)  #bilinear interpolation
        self.coriolis_texref.set_address_mode(
            0, cuda.address_mode.CLAMP)  #no indexing outside domain
        self.coriolis_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.coriolis_texref.set_flags(
            cuda.TRSF_NORMALIZED_COORDINATES)  #Use [0, 1] indexing
        # FIXME! Allow different versions of coriolis, similar to CDKLM

        # Texture for angle towards north
        self.angle_texref = self.kernels.get_texref("angle_tex")
        if isinstance(angle, cuda.Array):
            # angle is already a texture, so we just set the reference
            self.angle_texref.set_array(angle)
        else:
            #Upload data to GPU and bind to texture reference
            self.angle_texref.set_array(
                cuda.np_to_array(np.ascontiguousarray(angle, dtype=np.float32),
                                 order="C"))

        # Set texture parameters
        self.angle_texref.set_filter_mode(
            cuda.filter_mode.LINEAR)  #bilinear interpolation
        self.angle_texref.set_address_mode(
            0, cuda.address_mode.CLAMP)  #no indexing outside domain
        self.angle_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.angle_texref.set_flags(
            cuda.TRSF_NORMALIZED_COORDINATES)  #Use [0, 1] indexing

Пример #9

Показать файл

    def __init__(self, \
                 gpu_ctx, \
                 eta0, hu0, hv0, H, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 angle=np.array([[0]], dtype=np.float32), \
                 t=0.0, \
                 theta=1.3, rk_order=2, \
                 coriolis_beta=0.0, \
                 max_wind_direction_perturbation = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 boundary_conditions_data=Common.BoundaryConditionsData(), \
                 small_scale_perturbation=False, \
                 small_scale_perturbation_amplitude=None, \
                 small_scale_perturbation_interpolation_factor = 1, \
                 model_time_step=None,
                 reportGeostrophicEquilibrium=False, \
                 use_lcg=False, \
                 write_netcdf=False, \
                 comm=None, \
                 netcdf_filename=None, \
                 ignore_ghostcells=False, \
                 courant_number=0.8, \
                 offset_x=0, offset_y=0, \
                 flux_slope_eps = 1.0e-1, \
                 desingularization_eps = 1.0e-1, \
                 depth_cutoff = 1.0e-5, \
                 block_width=32, block_height=8, num_threads_dt=256,
                 block_width_model_error=16, block_height_model_error=16):
        """
        Initialization routine
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        H: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        angle: Angle of rotation from North to y-axis
        t: Start simulation at time t
        theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme
        rk_order: Order of Runge Kutta method {1,2*,3}
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees)
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        small_scale_perturbation: Boolean value for applying a stochastic model error
        small_scale_perturbation_amplitude: Amplitude (q0 coefficient) for model error
        small_scale_perturbation_interpolation_factor: Width factor for correlation in model error
        model_time_step: The size of a data assimilation model step (default same as dt)
        reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep
        use_lcg: Use LCG as the random number generator. Default is False, which means using curand.
        write_netcdf: Write the results after each superstep to a netCDF file
        comm: MPI communicator
        desingularization_eps: Used for desingularizing hu/h
        flux_slope_eps: Used for setting zero flux for symmetric Riemann fan
        depth_cutoff: Used for defining dry cells
        netcdf_filename: Use this filename. (If not defined, a filename will be generated by SimWriter.)
        """
               
        self.logger = logging.getLogger(__name__)

        assert( rk_order < 4 or rk_order > 0 ), "Only 1st, 2nd and 3rd order Runge Kutta supported"

        if (rk_order == 3):
            assert(r == 0.0), "3rd order Runge Kutta supported only without friction"
        
        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        ghost_cells_x = 2
        ghost_cells_y = 2
        
        #Coriolis at "first" cell
        x_zero_reference_cell = ghost_cells_x
        y_zero_reference_cell = ghost_cells_y # In order to pass it to the super constructor
        
        # Boundary conditions
        self.boundary_conditions = boundary_conditions
        if (boundary_conditions.isSponge()):
            nx = nx + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] - 2*ghost_cells_x
            ny = ny + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] - 2*ghost_cells_y
            
            x_zero_reference_cell += boundary_conditions.spongeCells[3]
            y_zero_reference_cell += boundary_conditions.spongeCells[2]

        #Compensate f for reference cell (first cell in internal of domain)
        north = np.array([np.sin(angle[0,0]), np.cos(angle[0,0])])
        f = f - coriolis_beta * (x_zero_reference_cell*dx*north[0] + y_zero_reference_cell*dy*north[1])
        
        x_zero_reference_cell = 0
        y_zero_reference_cell = 0
        
        A = None
        self.max_wind_direction_perturbation = max_wind_direction_perturbation
        super(CDKLM16, self).__init__(gpu_ctx, \
                                      nx, ny, \
                                      ghost_cells_x, \
                                      ghost_cells_y, \
                                      dx, dy, dt, \
                                      g, f, r, A, \
                                      t, \
                                      theta, rk_order, \
                                      coriolis_beta, \
                                      y_zero_reference_cell, \
                                      wind_stress, \
                                      write_netcdf, \
                                      ignore_ghostcells, \
                                      offset_x, offset_y, \
                                      comm, \
                                      block_width, block_height)
        
        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-2,-2,2,2])
        self._set_interior_domain_from_sponge_cells()
        
        defines={'block_width': block_width, 'block_height': block_height,
                   'KPSIMULATOR_DESING_EPS': str(desingularization_eps)+'f',
                   'KPSIMULATOR_FLUX_SLOPE_EPS': str(flux_slope_eps)+'f',
                   'KPSIMULATOR_DEPTH_CUTOFF': str(depth_cutoff)+'f'}
        
        #Get kernels
        self.kernel = gpu_ctx.get_kernel("CDKLM16_kernel.cu", 
                defines=defines, 
                compile_args={                          # default, fast_math, optimal
                    'options' : ["--ftz=true",          # false,   true,      true
                                 "--prec-div=false",    # true,    false,     false,
                                 "--prec-sqrt=false",   # true,    false,     false
                                 "--fmad=false"]        # true,    true,      false
                    
                    #'options': ["--use_fast_math"]
                    #'options': ["--generate-line-info"], 
                    #nvcc_options=["--maxrregcount=39"],
                    #'arch': "compute_50", 
                    #'code': "sm_50"
                },
                jit_compile_args={
                    #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)]
                }
                )
        
        # Get CUDA functions and define data types for prepared_{async_}call()
        self.cdklm_swe_2D = self.kernel.get_function("cdklm_swe_2D")
        self.cdklm_swe_2D.prepare("iiffffffffiiPiPiPiPiPiPiPiPiffi")
        self.update_wind_stress(self.kernel, self.cdklm_swe_2D)
        
        # CUDA functions for finding max time step size:
        self.num_threads_dt = num_threads_dt
        self.num_blocks_dt  = np.int32(self.global_size[0]*self.global_size[1])
        self.update_dt_kernels = gpu_ctx.get_kernel("max_dt.cu",
                defines={'block_width': block_width, 
                         'block_height': block_height,
                         'NUM_THREADS': self.num_threads_dt})
        self.per_block_max_dt_kernel = self.update_dt_kernels.get_function("per_block_max_dt")
        self.per_block_max_dt_kernel.prepare("iifffPiPiPiPifPi")
        self.max_dt_reduction_kernel = self.update_dt_kernels.get_function("max_dt_reduction")
        self.max_dt_reduction_kernel.prepare("iPP")
        
            
        # Bathymetry
        self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H, boundary_conditions)
                
        # Adjust eta for possible dry states
        Hm = self.downloadBathymetry()[1]
        eta0 = np.maximum(eta0, -Hm)
        
        # Create data by uploading to device
        self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0)

        # Allocate memory for calculating maximum timestep
        host_dt = np.zeros((self.global_size[1], self.global_size[0]), dtype=np.float32)
        self.device_dt = Common.CUDAArray2D(self.gpu_stream, self.global_size[0], self.global_size[1],
                                            0, 0, host_dt)
        host_max_dt_buffer = np.zeros((1,1), dtype=np.float32)
        self.max_dt_buffer = Common.CUDAArray2D(self.gpu_stream, 1, 1, 0, 0, host_max_dt_buffer)
        self.courant_number = courant_number
        
        ## Allocating memory for geostrophical equilibrium variables
        self.reportGeostrophicEquilibrium = np.int32(reportGeostrophicEquilibrium)
        self.geoEq_uxpvy = None
        self.geoEq_Kx = None
        self.geoEq_Ly = None
        if self.reportGeostrophicEquilibrium:
            dummy_zero_array = np.zeros((ny+2*ghost_cells_y, nx+2*ghost_cells_x), dtype=np.float32, order='C') 
            self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)
            self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)
            self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)

        self.constant_equilibrium_depth = np.max(H)
        
        self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \
                                                           self.nx, \
                                                           self.ny, \
                                                           ghost_cells_x, \
                                                           ghost_cells_y, \
                                                           self.boundary_conditions, \
                                                           boundary_conditions_data, \
        )

        # Small scale perturbation:
        self.small_scale_perturbation = small_scale_perturbation
        self.small_scale_model_error = None
        self.small_scale_perturbation_interpolation_factor = small_scale_perturbation_interpolation_factor
        if small_scale_perturbation:
            if small_scale_perturbation_amplitude is None:
                self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self,
                                                                                       interpolation_factor=small_scale_perturbation_interpolation_factor,
                                                                                       use_lcg=use_lcg,
                                                                                       block_width=block_width_model_error, 
                                                                                       block_height=block_height_model_error)
            else:
                self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self, 
                                                                                       soar_q0=small_scale_perturbation_amplitude,
                                                                                       interpolation_factor=small_scale_perturbation_interpolation_factor,
                                                                                       use_lcg=use_lcg,
                                                                                       block_width=block_width_model_error, 
                                                                                       block_height=block_height_model_error)
        
        
        # Data assimilation model step size
        self.model_time_step = model_time_step
        if model_time_step is None:
            self.model_time_step = self.dt
        self.total_time_steps = 0
        
        
        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, filename=netcdf_filename, ignore_ghostcells=self.ignore_ghostcells, \
                                    offset_x=self.offset_x, offset_y=self.offset_y)
                                    
                                    
        #Upload data to GPU and bind to texture reference
        self.angle_texref = self.kernel.get_texref("angle_tex")
        self.angle_texref.set_array(cuda.np_to_array(np.ascontiguousarray(angle, dtype=np.float32), order="C"))
                    
        # Set texture parameters
        self.angle_texref.set_filter_mode(cuda.filter_mode.LINEAR) #bilinear interpolation
        self.angle_texref.set_address_mode(0, cuda.address_mode.CLAMP) #no indexing outside domain
        self.angle_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.angle_texref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES) #Use [0, 1] indexing

Пример #10

Показать файл

    def __init__(self, \
                 gpu_ctx, \
                 eta0, hu0, hv0, H, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 subsample_f=10, \
                 angle=np.array([[0]], dtype=np.float32), \
                 subsample_angle=10, \
                 latitude=None, \
                 t=0.0, \
                 theta=1.3, rk_order=2, \
                 coriolis_beta=0.0, \
                 max_wind_direction_perturbation = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 boundary_conditions_data=Common.BoundaryConditionsData(), \
                 small_scale_perturbation=False, \
                 small_scale_perturbation_amplitude=None, \
                 small_scale_perturbation_interpolation_factor = 1, \
                 model_time_step=None,
                 reportGeostrophicEquilibrium=False, \
                 use_lcg=False, \
                 write_netcdf=False, \
                 comm=None, \
                 local_particle_id=0, \
                 super_dir_name=None, \
                 netcdf_filename=None, \
                 ignore_ghostcells=False, \
                 courant_number=0.8, \
                 offset_x=0, offset_y=0, \
                 flux_slope_eps = 1.0e-1, \
                 desingularization_eps = 1.0e-1, \
                 depth_cutoff = 1.0e-5, \
                 block_width=12, block_height=32, num_threads_dt=256,
                 block_width_model_error=16, block_height_model_error=16):
        """
        Initialization routine
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        H: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        subsample_f: Subsample the coriolis f when creating texture by factor
        angle: Angle of rotation from North to y-axis as a texture (cuda.Array) or numpy array (in radians)
        subsample_angle: Subsample the angles given as input when creating texture by factor
        latitude: Specify latitude. This will override any f and beta plane already set (in radians)
        t: Start simulation at time t
        theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme
        rk_order: Order of Runge Kutta method {1,2*,3}
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees)
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        small_scale_perturbation: Boolean value for applying a stochastic model error
        small_scale_perturbation_amplitude: Amplitude (q0 coefficient) for model error
        small_scale_perturbation_interpolation_factor: Width factor for correlation in model error
        model_time_step: The size of a data assimilation model step (default same as dt)
        reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep
        use_lcg: Use LCG as the random number generator. Default is False, which means using curand.
        write_netcdf: Write the results after each superstep to a netCDF file
        comm: MPI communicator
        local_particle_id: Local (for each MPI process) particle id
        desingularization_eps: Used for desingularizing hu/h
        flux_slope_eps: Used for setting zero flux for symmetric Riemann fan
        depth_cutoff: Used for defining dry cells
        super_dir_name: Directory to write netcdf files to
        netcdf_filename: Use this filename. (If not defined, a filename will be generated by SimWriter.)
        """

        self.logger = logging.getLogger(__name__)

        assert (rk_order < 4 or rk_order > 0
                ), "Only 1st, 2nd and 3rd order Runge Kutta supported"

        if (rk_order == 3):
            assert (r == 0.0
                    ), "3rd order Runge Kutta supported only without friction"

        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        ghost_cells_x = 2
        ghost_cells_y = 2

        #Coriolis at "first" cell
        x_zero_reference_cell = ghost_cells_x
        y_zero_reference_cell = ghost_cells_y  # In order to pass it to the super constructor

        # Boundary conditions
        self.boundary_conditions = boundary_conditions

        #Compensate f for reference cell (first cell in internal of domain)
        north = np.array([np.sin(angle[0, 0]), np.cos(angle[0, 0])])
        f = f - coriolis_beta * (x_zero_reference_cell * dx * north[0] +
                                 y_zero_reference_cell * dy * north[1])

        x_zero_reference_cell = 0
        y_zero_reference_cell = 0

        A = None
        self.max_wind_direction_perturbation = max_wind_direction_perturbation
        super(CDKLM16, self).__init__(gpu_ctx, \
                                      nx, ny, \
                                      ghost_cells_x, \
                                      ghost_cells_y, \
                                      dx, dy, dt, \
                                      g, f, r, A, \
                                      t, \
                                      theta, rk_order, \
                                      coriolis_beta, \
                                      y_zero_reference_cell, \
                                      wind_stress, \
                                      write_netcdf, \
                                      ignore_ghostcells, \
                                      offset_x, offset_y, \
                                      comm, \
                                      block_width, block_height,
                                      local_particle_id=local_particle_id)

        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-2, -2, 2, 2])

        defines = {
            'block_width': block_width,
            'block_height': block_height,
            'KPSIMULATOR_DESING_EPS': "{:.12f}f".format(desingularization_eps),
            'KPSIMULATOR_FLUX_SLOPE_EPS': "{:.12f}f".format(flux_slope_eps),
            'KPSIMULATOR_DEPTH_CUTOFF': "{:.12f}f".format(depth_cutoff),
            'THETA': "{:.12f}f".format(self.theta),
            'RK_ORDER': int(self.rk_order),
            'NX': int(self.nx),
            'NY': int(self.ny),
            'DX': "{:.12f}f".format(self.dx),
            'DY': "{:.12f}f".format(self.dy),
            'GRAV': "{:.12f}f".format(self.g),
            'FRIC': "{:.12f}f".format(self.r)
        }

        #Get kernels
        self.kernel = gpu_ctx.get_kernel(
            "CDKLM16_kernel.cu",
            defines=defines,
            compile_args={  # default, fast_math, optimal
                'options': [
                    "--ftz=true",  # false,   true,      true
                    "--prec-div=false",  # true,    false,     false,
                    "--prec-sqrt=false",  # true,    false,     false
                    "--fmad=false"
                ]  # true,    true,      false

                #'options': ["--use_fast_math"]
                #'options': ["--generate-line-info"],
                #nvcc_options=["--maxrregcount=39"],
                #'arch': "compute_50",
                #'code': "sm_50"
            },
            jit_compile_args={
                #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)]
            })

        # Get CUDA functions and define data types for prepared_{async_}call()
        self.cdklm_swe_2D = self.kernel.get_function("cdklm_swe_2D")
        self.cdklm_swe_2D.prepare("fiPiPiPiPiPiPiPiPiffi")
        self.update_wind_stress(self.kernel, self.cdklm_swe_2D)

        # CUDA functions for finding max time step size:
        self.num_threads_dt = num_threads_dt
        self.num_blocks_dt = np.int32(self.global_size[0] *
                                      self.global_size[1])
        self.update_dt_kernels = gpu_ctx.get_kernel("max_dt.cu",
                                                    defines={
                                                        'block_width':
                                                        block_width,
                                                        'block_height':
                                                        block_height,
                                                        'NUM_THREADS':
                                                        self.num_threads_dt
                                                    })
        self.per_block_max_dt_kernel = self.update_dt_kernels.get_function(
            "per_block_max_dt")
        self.per_block_max_dt_kernel.prepare("iifffPiPiPiPifPi")
        self.max_dt_reduction_kernel = self.update_dt_kernels.get_function(
            "max_dt_reduction")
        self.max_dt_reduction_kernel.prepare("iPP")

        # Bathymetry
        self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny,
                                            ghost_cells_x, ghost_cells_y, H,
                                            boundary_conditions)

        # Adjust eta for possible dry states
        Hm = self.downloadBathymetry()[1]
        eta0 = np.maximum(eta0, -Hm)

        # Create data by uploading to device
        self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               eta0, hu0, hv0)

        # Allocate memory for calculating maximum timestep
        host_dt = np.zeros((self.global_size[1], self.global_size[0]),
                           dtype=np.float32)
        self.device_dt = Common.CUDAArray2D(self.gpu_stream,
                                            self.global_size[0],
                                            self.global_size[1], 0, 0, host_dt)
        host_max_dt_buffer = np.zeros((1, 1), dtype=np.float32)
        self.max_dt_buffer = Common.CUDAArray2D(self.gpu_stream, 1, 1, 0, 0,
                                                host_max_dt_buffer)
        self.courant_number = courant_number

        ## Allocating memory for geostrophical equilibrium variables
        self.reportGeostrophicEquilibrium = np.int32(
            reportGeostrophicEquilibrium)
        self.geoEq_uxpvy = None
        self.geoEq_Kx = None
        self.geoEq_Ly = None
        if self.reportGeostrophicEquilibrium:
            dummy_zero_array = np.zeros(
                (ny + 2 * ghost_cells_y, nx + 2 * ghost_cells_x),
                dtype=np.float32,
                order='C')
            self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny,
                                                  ghost_cells_x, ghost_cells_y,
                                                  dummy_zero_array)
            self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               dummy_zero_array)
            self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               dummy_zero_array)

        self.constant_equilibrium_depth = np.max(H)

        self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \
                                                           self.nx, \
                                                           self.ny, \
                                                           ghost_cells_x, \
                                                           ghost_cells_y, \
                                                           self.boundary_conditions, \
                                                           boundary_conditions_data, \
        )

        def subsample_texture(data, factor):
            ny, nx = data.shape
            dx, dy = 1 / nx, 1 / ny
            I = interp2d(np.linspace(0.5 * dx, 1 - 0.5 * dx, nx),
                         np.linspace(0.5 * dy, 1 - 0.5 * dy, ny),
                         data,
                         kind='linear')

            new_nx, new_ny = max(2, nx // factor), max(2, ny // factor)
            new_dx, new_dy = 1 / new_nx, 1 / new_ny
            x_new = np.linspace(0.5 * new_dx, 1 - 0.5 * new_dx, new_nx)
            y_new = np.linspace(0.5 * new_dy, 1 - 0.5 * new_dy, new_ny)
            return I(x_new, y_new)

        # Texture for angle
        self.angle_texref = self.kernel.get_texref("angle_tex")
        if isinstance(angle, cuda.Array):
            # angle is already a texture, so we just set the texture reference
            self.angle_texref.set_array(angle)
        else:
            #Upload data to GPU and bind to texture reference
            if (subsample_angle and angle.size >= eta0.size):
                self.logger.info("Subsampling angle texture by factor " +
                                 str(subsample_angle))
                self.logger.warning(
                    "This will give inaccurate angle along the border!")
                angle = subsample_texture(angle, subsample_angle)

            self.angle_texref.set_array(
                cuda.np_to_array(np.ascontiguousarray(angle, dtype=np.float32),
                                 order="C"))

        # Set texture parameters
        self.angle_texref.set_filter_mode(
            cuda.filter_mode.LINEAR)  #bilinear interpolation
        self.angle_texref.set_address_mode(
            0, cuda.address_mode.CLAMP)  #no indexing outside domain
        self.angle_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.angle_texref.set_flags(
            cuda.TRSF_NORMALIZED_COORDINATES)  #Use [0, 1] indexing

        # Texture for coriolis f
        self.coriolis_texref = self.kernel.get_texref("coriolis_f_tex")

        # Create the CPU coriolis
        if (latitude is not None):
            if (self.f != 0.0):
                raise RuntimeError(
                    "Cannot specify both latitude and f. Make your mind up.")
            coriolis_f, _ = OceanographicUtilities.calcCoriolisParams(latitude)
            coriolis_f = coriolis_f.astype(np.float32)
        else:
            if (self.coriolis_beta != 0.0):
                if (angle.size != 1):
                    raise RuntimeError(
                        "non-constant angle cannot be combined with beta plane model (makes no sense)"
                    )
                #Generate coordinates for all cells, including ghost cells from center to center
                # [-3/2dx, nx+3/2dx] for ghost_cells_x == 2
                x = np.linspace((-self.ghost_cells_x + 0.5) * self.dx,
                                (self.nx + self.ghost_cells_x - 0.5) * self.dx,
                                self.nx + 2 * self.ghost_cells_x)
                y = np.linspace((-self.ghost_cells_y + 0.5) * self.dy,
                                (self.ny + self.ghost_cells_y - 0.5) * self.dy,
                                self.ny + 2 * self.ghost_cells_x)
                self.logger.info(
                    "Using latitude to create Coriolis f texture ({:f}x{:f} cells)"
                    .format(x.size, y.size))
                x, y = np.meshgrid(x, y)
                n = x * np.sin(angle[0, 0]) + y * np.cos(
                    angle[0, 0])  #North vector
                coriolis_f = self.f + self.coriolis_beta * n
            else:
                if (self.f.size == 1):
                    coriolis_f = np.array([[self.f]], dtype=np.float32)
                elif (self.f.shape == eta0.shape):
                    coriolis_f = np.array(self.f, dtype=np.float32)
                else:
                    raise RuntimeError(
                        "The shape of f should match up with eta or be scalar."
                    )

        if (subsample_f and coriolis_f.size >= eta0.size):
            self.logger.info("Subsampling coriolis texture by factor " +
                             str(subsample_f))
            self.logger.warning(
                "This will give inaccurate coriolis along the border!")
            coriolis_f = subsample_texture(coriolis_f, subsample_f)

        #Upload data to GPU and bind to texture reference
        self.coriolis_texref.set_array(
            cuda.np_to_array(np.ascontiguousarray(coriolis_f,
                                                  dtype=np.float32),
                             order="C"))

        # Set texture parameters
        self.coriolis_texref.set_filter_mode(
            cuda.filter_mode.LINEAR)  #bilinear interpolation
        self.coriolis_texref.set_address_mode(
            0, cuda.address_mode.CLAMP)  #no indexing outside domain
        self.coriolis_texref.set_address_mode(1, cuda.address_mode.CLAMP)
        self.coriolis_texref.set_flags(
            cuda.TRSF_NORMALIZED_COORDINATES)  #Use [0, 1] indexing

        # Small scale perturbation:
        self.small_scale_perturbation = small_scale_perturbation
        self.small_scale_model_error = None
        self.small_scale_perturbation_interpolation_factor = small_scale_perturbation_interpolation_factor
        if small_scale_perturbation:
            self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(
                self,
                soar_q0=small_scale_perturbation_amplitude,
                interpolation_factor=
                small_scale_perturbation_interpolation_factor,
                use_lcg=use_lcg,
                block_width=block_width_model_error,
                block_height=block_height_model_error)

        # Data assimilation model step size
        self.model_time_step = model_time_step
        self.total_time_steps = 0
        if model_time_step is None:
            self.model_time_step = self.dt

        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, super_dir_name=super_dir_name, filename=netcdf_filename, \
                                            ignore_ghostcells=self.ignore_ghostcells, offset_x=self.offset_x, offset_y=self.offset_y)

        # Update timestep if dt is given as zero
        if self.dt <= 0:
            self.updateDt()

Пример #11

Показать файл

Файл: FBL.py Проект: kaihc/gpu-ocean

    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        coriolis_beta: Coriolis linear factor -> f = f + beta*y
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        """

        #Create data by uploading to device
        ghost_cells_x = 0
        ghost_cells_y = 0
        y_zero_reference_cell = y_zero_reference_cell
        self.asym_ghost_cells = [0, 0, 0, 0]  # [N, E, S, W]

        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([None, None, 0, 0])

        self.boundary_conditions = boundary_conditions
        # Add asym ghost cell if periodic boundary condition:
        if (self.boundary_conditions.north == 2) or \
           (self.boundary_conditions.south == 2):
            self.asym_ghost_cells[0] = 1
            self.interior_domain_indices[0] = -1
        if (self.boundary_conditions.east == 2) or \
           (self.boundary_conditions.west == 2):
            self.asym_ghost_cells[1] = 1
            self.interior_domain_indices[1] = -1

        if boundary_conditions.isSponge():
            nx = nx + boundary_conditions.spongeCells[
                1] + boundary_conditions.spongeCells[
                    3]  # - self.asym_ghost_cells[1] - self.asym_ghost_cells[3]
            ny = ny + boundary_conditions.spongeCells[
                0] + boundary_conditions.spongeCells[
                    2]  # - self.asym_ghost_cells[0] - self.asym_ghost_cells[2]
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[
                2]

        rk_order = None
        theta = None
        A = None
        super(FBL, self).__init__(gpu_ctx, \
                                  nx, ny, \
                                  ghost_cells_x, \
                                  ghost_cells_y, \
                                  dx, dy, dt, \
                                  g, f, r, A, \
                                  t, \
                                  theta, rk_order, \
                                  coriolis_beta, \
                                  y_zero_reference_cell, \
                                  wind_stress, \
                                  write_netcdf, \
                                  ignore_ghostcells, \
                                  offset_x, offset_y, \
                                  block_width, block_height)
        self._set_interior_domain_from_sponge_cells()

        #Get kernels
        self.u_kernel = gpu_ctx.get_kernel("FBL_U_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.v_kernel = gpu_ctx.get_kernel("FBL_V_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.eta_kernel = gpu_ctx.get_kernel("FBL_eta_kernel.cu",
                                             defines={
                                                 'block_width': block_width,
                                                 'block_height': block_height
                                             })

        # Get CUDA functions
        self.computeUKernel = self.u_kernel.get_function("computeUKernel")
        self.computeVKernel = self.v_kernel.get_function("computeVKernel")
        self.computeEtaKernel = self.eta_kernel.get_function(
            "computeEtaKernel")

        # Prepare kernel lauches
        self.computeUKernel.prepare("iiffffffffPiPiPiPif")
        self.computeVKernel.prepare("iiffffffffPiPiPiPif")
        self.computeEtaKernel.prepare("iiffffffffPiPiPiPi")

        # Set up textures
        self.update_wind_stress(self.u_kernel, self.computeUKernel)
        self.update_wind_stress(self.v_kernel, self.computeVKernel)

        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x,
                                    ghost_cells_y, H, self.asym_ghost_cells)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny,
                                               ghost_cells_x, ghost_cells_y,
                                               eta0, hu0, hv0,
                                               self.asym_ghost_cells)

        # Overwrite halo with asymetric ghost cells
        self.nx_halo = np.int32(nx + self.asym_ghost_cells[1] +
                                self.asym_ghost_cells[3])
        self.ny_halo = np.int32(ny + self.asym_ghost_cells[0] +
                                self.asym_ghost_cells[2])

        self.bc_kernel = FBL_periodic_boundary(self.gpu_ctx, \
                                               self.nx, \
                                               self.ny, \
                                               self.boundary_conditions, \
                                               self.asym_ghost_cells
        )

        self.totalNumIterations = 0
        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)

Пример #12

Показать файл

    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, A=0.0, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        A: Eddy viscosity coefficient (O(dx))
        t: Start simulation at time t
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        """

        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        halo_x = 1
        halo_y = 1
        ghost_cells_x = 1
        ghost_cells_y = 1
        y_zero_reference_cell = y_zero_reference_cell + 1

        self.boundary_conditions = boundary_conditions
        if boundary_conditions.isSponge():
            nx = nx + boundary_conditions.spongeCells[
                1] + boundary_conditions.spongeCells[3] - 2 * ghost_cells_x
            ny = ny + boundary_conditions.spongeCells[
                0] + boundary_conditions.spongeCells[2] - 2 * ghost_cells_y
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[
                2]

        # self.<parameters> are sat in parent constructor:
        rk_order = None
        theta = None
        super(CTCS, self).__init__(gpu_ctx, \
                                   nx, ny, \
                                   ghost_cells_x, \
                                   ghost_cells_y, \
                                   dx, dy, dt, \
                                   g, f, r, A, \
                                   t, \
                                   theta, rk_order, \
                                   coriolis_beta, \
                                   y_zero_reference_cell, \
                                   wind_stress, \
                                   write_netcdf, \
                                   ignore_ghostcells, \
                                   offset_x, offset_y, \
                                   block_width, block_height)

        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-1, -1, 1, 1])
        self._set_interior_domain_from_sponge_cells()

        #Get kernels
        self.u_kernel = gpu_ctx.get_kernel("CTCS_U_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.v_kernel = gpu_ctx.get_kernel("CTCS_V_kernel.cu",
                                           defines={
                                               'block_width': block_width,
                                               'block_height': block_height
                                           })
        self.eta_kernel = gpu_ctx.get_kernel("CTCS_eta_kernel.cu",
                                             defines={
                                                 'block_width': block_width,
                                                 'block_height': block_height
                                             })

        # Get CUDA functions
        self.computeUKernel = self.u_kernel.get_function("computeUKernel")
        self.computeVKernel = self.v_kernel.get_function("computeVKernel")
        self.computeEtaKernel = self.eta_kernel.get_function(
            "computeEtaKernel")

        # Prepare kernel lauches
        self.computeUKernel.prepare("iiiifffffffffPiPiPiPiPif")
        self.computeVKernel.prepare("iiiifffffffffPiPiPiPiPif")
        self.computeEtaKernel.prepare("iiffffffffPiPiPi")

        # Set up textures
        self.update_wind_stress(self.u_kernel, self.computeUKernel)
        self.update_wind_stress(self.v_kernel, self.computeVKernel)

        #Create data by uploading to device
        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, halo_x, halo_y, H)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, halo_x,
                                               halo_y, eta0, hu0, hv0)

        # Global size needs to be larger than the default from parent.__init__
        self.global_size = ( \
                       int(np.ceil((self.nx+2*halo_x) / float(self.local_size[0]))), \
                       int(np.ceil((self.ny+2*halo_y) / float(self.local_size[1]))) \
                      )

        self.bc_kernel = CTCS_boundary_condition(gpu_ctx, \
                                                 self.nx, \
                                                 self.ny, \
                                                 self.boundary_conditions, \
                                                 halo_x, halo_y \
        )

        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, offset_x=self.offset_x, offset_y=self.offset_y)

Пример #13

Показать файл

    def __init__(self, \
                 gpu_ctx, \
                 eta0, hu0, hv0, Hi, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 t=0.0, \
                 theta=1.3, rk_order=2, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 0, \
                 max_wind_direction_perturbation = 0, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 small_scale_perturbation=False, \
                 small_scale_perturbation_amplitude=None, \
                 h0AsWaterElevation=False, \
                 reportGeostrophicEquilibrium=False, \
                 write_netcdf=False, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=32, block_height=4):
        """
        Initialization routine
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
        Hi: Depth from equilibrium defined on cell corners, (nx+5)*(ny+5) corners
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        t: Start simulation at time t
        theta: MINMOD theta used the reconstructions of the derivatives in the numerical scheme
        rk_order: Order of Runge Kutta method {1,2*,3}
        coriolis_beta: Coriolis linear factor -> f = f + beta*(y-y_0)
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        max_wind_direction_perturbation: Large-scale model error emulation by per-time-step perturbation of wind direction by +/- max_wind_direction_perturbation (degrees)
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        h0AsWaterElevation: True if h0 is described by the surface elevation, and false if h0 is described by water depth
        reportGeostrophicEquilibrium: Calculate the Geostrophic Equilibrium variables for each superstep
        write_netcdf: Write the results after each superstep to a netCDF file
        """
               
        

        ## After changing from (h, B) to (eta, H), several of the simulator settings used are wrong. This check will help detect that.
        if ( np.sum(eta0 - Hi[:-1, :-1] > 0) > nx):
            assert(False), "It seems you are using water depth/elevation h and bottom topography B, while you should use water level eta and equillibrium depth H."
        
        assert( rk_order < 4 or rk_order > 0 ), "Only 1st, 2nd and 3rd order Runge Kutta supported"

        if (rk_order == 3):
            assert(r == 0.0), "3rd order Runge Kutta supported only without friction"
        
        # Sort out internally represented ghost_cells in the presence of given
        # boundary conditions
        ghost_cells_x = 2
        ghost_cells_y = 2
        y_zero_reference_cell = 2 + y_zero_reference_cell
        
        # Boundary conditions
        self.boundary_conditions = boundary_conditions
        if (boundary_conditions.isSponge()):
            nx = nx + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3] - 2*ghost_cells_x
            ny = ny + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2] - 2*ghost_cells_y
            y_zero_reference_cell = boundary_conditions.spongeCells[2] + y_zero_reference_cell
        
        A = None
        self.max_wind_direction_perturbation = max_wind_direction_perturbation
        super(CDKLM16, self).__init__(gpu_ctx, \
                                      nx, ny, \
                                      ghost_cells_x, \
                                      ghost_cells_y, \
                                      dx, dy, dt, \
                                      g, f, r, A, \
                                      t, \
                                      theta, rk_order, \
                                      coriolis_beta, \
                                      y_zero_reference_cell, \
                                      wind_stress, \
                                      write_netcdf, \
                                      ignore_ghostcells, \
                                      offset_x, offset_y, \
                                      block_width, block_height)
        
        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-2,-2,2,2])
        self._set_interior_domain_from_sponge_cells()
        
        #Get kernels
        self.kernel = gpu_ctx.get_kernel("CDKLM16_kernel.cu", defines={'block_width': block_width, 'block_height': block_height})
        
        # Get CUDA functions and define data types for prepared_{async_}call()
        self.swe_2D = self.kernel.get_function("swe_2D")
        self.swe_2D.prepare("iifffffffffiiPiPiPiPiPiPiPiPifiiiiiPiPiPi")
        self.update_wind_stress(self.kernel, self.swe_2D)
        
        #Create data by uploading to device
        self.gpu_data = Common.SWEDataArakawaA(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0)

        ## Allocating memory for geostrophical equilibrium variables
        self.reportGeostrophicEquilibrium = np.int32(reportGeostrophicEquilibrium)
        dummy_zero_array = np.zeros((ny+2*ghost_cells_y, nx+2*ghost_cells_x), dtype=np.float32, order='C') 
        self.geoEq_uxpvy = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)
        self.geoEq_Kx = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)
        self.geoEq_Ly = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, dummy_zero_array)

        #Bathymetry
        self.bathymetry = Common.Bathymetry(gpu_ctx, self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, Hi, boundary_conditions)
        self.h0AsWaterElevation = h0AsWaterElevation
        if self.h0AsWaterElevation:
            self.bathymetry.waterElevationToDepth(self.gpu_data.h0)
        
        self.constant_equilibrium_depth = np.max(Hi)
        
        self.bc_kernel = Common.BoundaryConditionsArakawaA(gpu_ctx, \
                                                           self.nx, \
                                                           self.ny, \
                                                           ghost_cells_x, \
                                                           ghost_cells_y, \
                                                           self.boundary_conditions, \
        )

        # Small scale perturbation:
        self.small_scale_perturbation = small_scale_perturbation
        self.small_scale_model_error = None
        if small_scale_perturbation:
            if small_scale_perturbation_amplitude is None:
                self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self)
            else:
                self.small_scale_model_error = OceanStateNoise.OceanStateNoise.fromsim(self, soar_q0=small_scale_perturbation_amplitude)
        
        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    offset_x=self.offset_x, offset_y=self.offset_y)

Пример #14

Показать файл

    def __init__(self, \
                 gpu_ctx, \
                 H, eta0, hu0, hv0, \
                 nx, ny, \
                 dx, dy, dt, \
                 g, f, r, \
                 t=0.0, \
                 coriolis_beta=0.0, \
                 y_zero_reference_cell = 1, \
                 wind_stress=WindStress.WindStress(), \
                 boundary_conditions=Common.BoundaryConditions(), \
                 write_netcdf=False, \
                 comm=None, \
                 ignore_ghostcells=False, \
                 offset_x=0, offset_y=0, \
                 block_width=16, block_height=16):
        """
        Initialization routine
        H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
        eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
        hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+3) cells
        nx: Number of cells along x-axis
        ny: Number of cells along y-axis
        dx: Grid cell spacing along x-axis (20 000 m)
        dy: Grid cell spacing along y-axis (20 000 m)
        dt: Size of each timestep (90 s)
        g: Gravitational accelleration (9.81 m/s^2)
        f: Coriolis parameter (1.2e-4 s^1), effectively as f = f + beta*y
        r: Bottom friction coefficient (2.4e-3 m/s)
        coriolis_beta: Coriolis linear factor -> f = f + beta*y
        y_zero_reference_cell: The cell representing y_0 in the above, defined as the lower face of the cell .
        wind_stress: Wind stress parameters
        boundary_conditions: Boundary condition object
        write_netcdf: Write the results after each superstep to a netCDF file
        comm: MPI communicator
        """
        
        #### THIS ALLOWS MAKES IT POSSIBLE TO GIVE THE OLD INPUT SHAPES TO NEW GHOST CELL REGIME: Only valid for benchmarking!
        if (eta0.shape == (ny, nx)):
            new_eta = np.zeros((ny+2, nx+2),  dtype=np.float32)
            new_eta[:ny, :nx] = eta0.copy()
            eta0 = new_eta.copy()
        if (H.shape == (ny, nx)):
            new_H = np.ones((ny+2, nx+2),  dtype=np.float32)*np.max(H)
            new_H[:ny,:nx] = H.copy()
            H = new_H.copy()
        if (hu0.shape == (ny, nx+1)):
            new_hu = np.zeros((ny+2, nx+1),  dtype=np.float32)
            new_hu[:ny, :nx+1] = hu0.copy()
            hu0 = new_hu.copy()
        if (hv0.shape == (ny+1, nx)):
            new_hv = np.zeros((ny+3, nx+2),  dtype=np.float32)
            new_hv[:ny+1,:nx] = hv0.copy()
            hv0 = new_hv.copy()
        
            
         
        
        
        #Create data by uploading to device
        ghost_cells_x = 1
        ghost_cells_y = 1
        y_zero_reference_cell = y_zero_reference_cell
        
        # Index range for interior domain (north, east, south, west)
        # so that interior domain of eta is
        # eta[self.interior_domain_indices[2]:self.interior_domain_indices[0], \
        #     self.interior_domain_indices[3]:self.interior_domain_indices[1] ]
        self.interior_domain_indices = np.array([-1, -1, 1, 1])
        
        self.boundary_conditions = boundary_conditions

        if boundary_conditions.isSponge():
            nx = nx - 2 + boundary_conditions.spongeCells[1] + boundary_conditions.spongeCells[3]
            ny = ny - 2 + boundary_conditions.spongeCells[0] + boundary_conditions.spongeCells[2]
            y_zero_reference_cell = y_zero_reference_cell + boundary_conditions.spongeCells[2]
          
        rk_order = None
        theta = None
        A = None
        super(FBL, self).__init__(gpu_ctx, \
                                  nx, ny, \
                                  ghost_cells_x, \
                                  ghost_cells_y, \
                                  dx, dy, dt, \
                                  g, f, r, A, \
                                  t, \
                                  theta, rk_order, \
                                  coriolis_beta, \
                                  y_zero_reference_cell, \
                                  wind_stress, \
                                  write_netcdf, \
                                  ignore_ghostcells, \
                                  offset_x, offset_y, \
                                  comm, \
                                  block_width, block_height)
        self._set_interior_domain_from_sponge_cells()
        
        
        #Get kernels
        self.step_kernel = gpu_ctx.get_kernel("FBL_step_kernel.cu", 
                defines={'block_width': block_width, 'block_height': block_height},
                compile_args={
                    'no_extern_c': True,
                    'options': ["--use_fast_math"],
                    #'options': ["--generate-line-info"], 
                    #'options': ["--maxrregcount=32"]
                    #'arch': "compute_50", 
                    #'code': "sm_50"
                },
                jit_compile_args={
                    #jit_options=[(cuda.jit_option.MAX_REGISTERS, 39)]
                }
        )
         
        # Get CUDA functions 
        self.fblStepKernel = self.step_kernel.get_function("fblStepKernel")
        
        # Prepare kernel lauches
        self.fblStepKernel.prepare("iiffffffffPiPiPiPiif")
        
        # Set up textures
        self.update_wind_stress(self.step_kernel, self.fblStepKernel)
        
        self.H = Common.CUDAArray2D(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, H)
        self.gpu_data = Common.SWEDataArakawaC(self.gpu_stream, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0, fbl=True)
        
        # Domain including ghost cells
        self.nx_halo = np.int32(nx + 2)
        self.ny_halo = np.int32(ny + 2)
       
        self.bc_kernel = FBL_boundary_conditions(self.gpu_ctx, \
                                                 self.nx, \
                                                 self.ny, \
                                                 self.boundary_conditions
        )
        
        # Bit-wise boolean for wall boundary conditions
        self.wall_bc = np.int32(0)
        if (self.boundary_conditions.north == 1):
            self.wall_bc = self.wall_bc | 0x01
        if (self.boundary_conditions.east == 1):
            self.wall_bc = self.wall_bc | 0x02
        if (self.boundary_conditions.south == 1):
            self.wall_bc = self.wall_bc | 0x04
        if (self.boundary_conditions.west == 1):
            self.wall_bc = self.wall_bc | 0x08

        if self.write_netcdf:
            self.sim_writer = SimWriter.SimNetCDFWriter(self, ignore_ghostcells=self.ignore_ghostcells, \
                                    staggered_grid=True, \
                                    offset_x=self.offset_x, offset_y=self.offset_y)

Пример #15

Показать файл

Файл: OceanStateNoise.py Проект: kaihc/gpu-ocean

    def __init__(self, gpu_ctx, gpu_stream,
                 nx, ny, dx, dy,
                 boundaryConditions, staggered,
                 soar_q0=None, soar_L=None,
                 block_width=16, block_height=16):
        
        self.random_numbers = None
        self.seed = None
        
        self.gpu_ctx = gpu_ctx
        self.gpu_stream = gpu_stream
        
        self.nx = np.int32(nx)
        self.ny = np.int32(ny)
        self.dx = np.float32(dx)
        self.dy = np.float32(dy)
        self.staggered = np.int(0)
        if staggered:
            self.staggered = np.int(1)
            
        # The cutoff parameter is hard-coded.
        # The size of the cutoff determines the computational radius in the
        # SOAR function. Hence, the size of the local memory in the OpenCL 
        # kernels has to be hard-coded.
        self.cutoff = np.int32(config.soar_cutoff) 
        
        self.periodicNorthSouth = np.int32(boundaryConditions.isPeriodicNorthSouth())
        self.periodicEastWest = np.int32(boundaryConditions.isPeriodicEastWest())
        
        # Size of random field and seed
        self.rand_nx = np.int32(nx + 2*(1+self.cutoff))
        self.rand_ny = np.int32(ny + 2*(1+self.cutoff))
        if self.periodicEastWest:
            self.rand_nx = np.int32(nx)
        if self.periodicNorthSouth:
            self.rand_ny = np.int32(ny)
        self.seed_ny = np.int32(self.rand_ny)
        self.seed_nx = np.int32(self.rand_nx/2) 
        ### WHAT IF rand_nx IS ODD??
        # For now, we check this by assert
        assert(self.rand_nx % 2 == 0), "The OceanStateNoise module might not work with odd Nx, so just to be sure you are not allowed to use odd Nx for now :)"
        
        # Constants for the SOAR function:
        self.soar_q0 = np.float32(self.dx/100000)
        if soar_q0 is not None:
            self.soar_q0 = np.float32(soar_q0)
            
        self.soar_L = np.float32(0.75*self.dx)
        if soar_L is not None:
            self.soar_L = np.float32(soar_L)
        
        # Generate seed:
        self.floatMax = 2147483648.0
        self.host_seed = np.random.rand(self.seed_ny, self.seed_nx)*self.floatMax
        self.host_seed = self.host_seed.astype(np.uint64, order='C')

        self.seed = Common.CUDAArray2D(gpu_stream, self.seed_nx, self.seed_ny, 0, 0, self.host_seed, double_precision=True, integers=True)
        
        # Allocate memory for random numbers
        self.random_numbers_host = np.zeros((self.rand_ny, self.rand_nx), dtype=np.float32, order='C')
        self.random_numbers = Common.CUDAArray2D(self.gpu_stream, self.rand_nx, self.rand_ny, 0, 0, self.random_numbers_host)
        
        # Generate kernels
        self.kernels = gpu_ctx.get_kernel("ocean_noise.cu", \
                                          defines={'block_width': block_width, 'block_height': block_height})
 
        # Get CUDA functions and define data types for prepared_{async_}call()
        self.uniformDistributionKernel = self.kernels.get_function("uniformDistribution")
        self.uniformDistributionKernel.prepare("iiiPiPi")
        
        self.normalDistributionKernel = self.kernels.get_function("normalDistribution")
        self.normalDistributionKernel.prepare("iiiPiPi")
        
        self.perturbOceanKernel = self.kernels.get_function("perturbOcean")
        self.perturbOceanKernel.prepare("iiffiiffffffiiPiPiPiPiPi")
        
        #Compute kernel launch parameters
        self.local_size = (block_width, block_height, 1) 
        self.global_size_random_numbers = ( \
                       int(np.ceil(self.seed_nx / float(self.local_size[0]))), \
                       int(np.ceil(self.seed_ny / float(self.local_size[1]))) \
                                  ) 
        self.global_size_noise = ( \
                       int(np.ceil(self.rand_nx / float(self.local_size[0]))), \
                       int(np.ceil(self.rand_ny / float(self.local_size[1]))) \
                                  )

Пример #16

Показать файл

 def init_double(self):
     self.double_cudaarray = Common.CUDAArray2D(self.gpu_stream, \
                                              self.nx, self.ny, \
                                              self.nx_halo, self.ny_halo, \
                                              self.dbuf1, \
                                              double_precision=True)