def test_reduction(ctx_factory, grid_shape, proc_shape, dtype, op, _grid_shape, pass_grid_dims, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) h = 1 grid_shape = _grid_shape or grid_shape mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) from pymbolic import var from pystella import Field tmp_insns = [(var("x"), Field("f") / 2 + .31)] reducers = {} reducers["avg"] = [(var("x"), op)] if pass_grid_dims: reducer = ps.Reduction(mpi, reducers, rank_shape=rank_shape, tmp_instructions=tmp_insns, grid_size=np.product(grid_shape)) else: reducer = ps.Reduction(mpi, reducers, tmp_instructions=tmp_insns) f = clr.rand(queue, rank_shape, dtype=dtype) import pyopencl.tools as clt pool = clt.MemoryPool(clt.ImmediateAllocator(queue)) result = reducer(queue, f=f, allocator=pool) avg = result["avg"] avg_test = reducer.reduce_array(f / 2 + .31, op) if op == "avg": avg_test /= np.product(grid_shape) rtol = 5e-14 if dtype == np.float64 else 1e-5 assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \ f"{op} reduction innaccurate for {grid_shape=}, {proc_shape=}" if timing: from common import timer t = timer(lambda: reducer(queue, f=f, allocator=pool), ntime=1000) if mpi.rank == 0: print( f"reduction took {t:.3f} ms for {grid_shape=}, {proc_shape=}") bandwidth = f.nbytes / 1024**3 / t * 1000 print(f"Bandwidth = {bandwidth:.1f} GB/s")
def test_reduction_with_new_shape(ctx_factory, grid_shape, proc_shape, dtype, op, _grid_shape, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) h = 1 grid_shape = _grid_shape or grid_shape mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) from pystella import Field reducers = {} reducers["avg"] = [(Field("f"), op)] reducer = ps.Reduction(mpi, reducers) f = clr.rand(queue, rank_shape, dtype=dtype) result = reducer(queue, f=f) avg = result["avg"] avg_test = reducer.reduce_array(f, op) if op == "avg": avg_test /= np.product(grid_shape) rtol = 5e-14 if dtype == np.float64 else 1e-5 assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \ f"{op} reduction innaccurate for {grid_shape=}, {proc_shape=}" # test call to reducer with new shape grid_shape = tuple(Ni // 2 for Ni in grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) f = clr.rand(queue, rank_shape, dtype=dtype) result = reducer(queue, f=f) avg = result["avg"] avg_test = reducer.reduce_array(f, op) if op == "avg": avg_test /= np.product(grid_shape) rtol = 5e-14 if dtype == np.float64 else 1e-5 assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \ f"{op} reduction w/new shape innaccurate for {grid_shape=}, {proc_shape=}"
def test_scalar_energy(ctx_factory, grid_shape, proc_shape, h, dtype, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) grid_size = np.product(grid_shape) nscalars = 2 def potential(f): phi, chi = f[0], f[1] return 1 / 2 * phi**2 + 1 / 2 * chi**2 + 1 / 2 * phi**2 * chi**2 scalar_sector = ps.ScalarSector(nscalars, potential=potential) scalar_energy = ps.Reduction(mpi, scalar_sector, rank_shape=rank_shape, grid_size=grid_size, halo_shape=h) pencil_shape = tuple(ni + 2 * h for ni in rank_shape) f = clr.rand(queue, (nscalars, ) + pencil_shape, dtype) dfdt = clr.rand(queue, (nscalars, ) + pencil_shape, dtype) lap = clr.rand(queue, (nscalars, ) + rank_shape, dtype) energy = scalar_energy(queue, f=f, dfdt=dfdt, lap_f=lap, a=np.array(1.)) kin_test = [] grad_test = [] for fld in range(nscalars): df_h = dfdt[fld].get() rank_sum = np.sum(df_h[h:-h, h:-h, h:-h]**2) kin_test.append(1 / 2 * mpi.allreduce(rank_sum) / grid_size) f_h = f[fld].get() lap_h = lap[fld].get() rank_sum = np.sum(-f_h[h:-h, h:-h, h:-h] * lap_h) grad_test.append(1 / 2 * mpi.allreduce(rank_sum) / grid_size) energy_test = {} energy_test["kinetic"] = np.array(kin_test) energy_test["gradient"] = np.array(grad_test) phi = f[0].get()[h:-h, h:-h, h:-h] chi = f[1].get()[h:-h, h:-h, h:-h] pot_rank = np.sum(potential([phi, chi])) energy_test["potential"] = np.array(mpi.allreduce(pot_rank) / grid_size) max_rtol = 1e-14 if dtype == np.float64 else 1e-5 avg_rtol = 1e-14 if dtype == np.float64 else 1e-5 for key, value in energy.items(): max_err, avg_err = get_errs(value, energy_test[key]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"{key} inaccurate for {nscalars=}, {grid_shape=}, {proc_shape=}" \ f": {max_err=}, {avg_err=}" if timing: from common import timer t = timer(lambda: scalar_energy( queue, a=np.array(1.), f=f, dfdt=dfdt, lap_f=lap)) if mpi.rank == 0: print(f"scalar energy took {t:.3f} " f"ms for {nscalars=}, {grid_shape=}, {proc_shape=}")
scalar_sector = ps.ScalarSector(nscalars, potential=potential) sectors = [scalar_sector] if gravitational_waves: gw_sector = ps.TensorPerturbationSector([scalar_sector]) sectors += [gw_sector] stepper = Stepper(sectors, halo_shape=halo_shape, rank_shape=rank_shape, dt=dt) # create energy computation function from pystella.sectors import get_rho_and_p reduce_energy = ps.Reduction(decomp, scalar_sector, halo_shape=halo_shape, callback=get_rho_and_p, rank_shape=rank_shape, grid_size=grid_size) def compute_energy(f, dfdt, lap_f, dfdx, a): if gravitational_waves: derivs(queue, fx=f, lap=lap_f, grd=dfdx) else: derivs(queue, fx=f, lap=lap_f) return reduce_energy(queue, f=f, dfdt=dfdt, lap_f=lap_f, a=np.array(a)) # create output function if decomp.rank == 0: