def get_pbc_kernels(self, runner): dist_kernels = super(LBSingleFluidShanChen, self).get_pbc_kernels(runner) macro_kernels = defaultdict(lambda: defaultdict(list)) if self.config.node_addressing == 'indirect': signature = 'PPi' args = [runner.gpu_indirect_address()] else: signature = 'Pi' args = [] for i in range(0, 3): for field_pair in self._scalar_fields: if not field_pair.abstract.need_nn: continue macro_kernels[0][i].append( runner.get_kernel( 'ApplyMacroPeriodicBoundaryConditions', args + [runner.gpu_field(field_pair.buffer), np.uint32(i)], signature)) for i in range(0, 3): for field_pair in self._scalar_fields: if not field_pair.abstract.need_nn: continue macro_kernels[1][i].append( runner.get_kernel( 'ApplyMacroPeriodicBoundaryConditions', args + [runner.gpu_field(field_pair.buffer), np.uint32(i)], signature)) ret = subdomain_runner.MacroKernels(macro=macro_kernels, distributions=dist_kernels) return ret
def get_pbc_kernels(self, runner): gpu_dist1a = runner.gpu_dist(0, 0) gpu_dist1b = runner.gpu_dist(0, 1) gpu_dist2a = runner.gpu_dist(1, 0) gpu_dist2b = runner.gpu_dist(1, 1) # grid type (primary, secondary) -> axis -> kernels dist_kernels = defaultdict(lambda: defaultdict(list)) macro_kernels = defaultdict(lambda: defaultdict(list)) if self.config.node_addressing == 'indirect': args = [runner.gpu_indirect_address()] signature = 'PPi' else: args = [] signature = 'Pi' for i in range(0, 3): dist_kernels[0][i] = [ runner.get_kernel('ApplyPeriodicBoundaryConditions', args + [gpu_dist1a, np.uint32(i)], signature), runner.get_kernel('ApplyPeriodicBoundaryConditions', args + [gpu_dist2a, np.uint32(i)], signature) ] for field_pair in self._scalar_fields: if not field_pair.abstract.need_nn: continue macro_kernels[0][i].append( runner.get_kernel( 'ApplyMacroPeriodicBoundaryConditions', args + [runner.gpu_field(field_pair.buffer), np.uint32(i)], signature)) if self.config.access_pattern == 'AB': gpu_dist1 = gpu_dist1b gpu_dist2 = gpu_dist2b kernel = 'ApplyPeriodicBoundaryConditions' else: gpu_dist1 = gpu_dist1a gpu_dist2 = gpu_dist2a kernel = 'ApplyPeriodicBoundaryConditionsWithSwap' for i in range(0, 3): if self.config.node_addressing == 'indirect': args2 = [runner.gpu_indirect_address()] sig2 = 'PPi' else: args2 = [] sig2 = 'Pi' dist_kernels[1][i] = [ runner.get_kernel(kernel, args2 + [gpu_dist1, np.uint32(i)], sig2), runner.get_kernel(kernel, args2 + [gpu_dist2, np.uint32(i)], sig2) ] # This is the same as above -- for macroscopic fields, there is no # distinction between primary and secondary buffers. for field_pair in self._scalar_fields: if not field_pair.abstract.need_nn: continue macro_kernels[1][i].append( runner.get_kernel( 'ApplyMacroPeriodicBoundaryConditions', args + [runner.gpu_field(field_pair.buffer), np.uint32(i)], signature)) ret = subdomain_runner.MacroKernels(macro=macro_kernels, distributions=dist_kernels) return ret