def test_vector_projector(ctx_factory, grid_shape, proc_shape, h, dtype, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) pencil_shape = tuple(ni + 2 * h for ni in rank_shape) L = (10, 8, 11.5) dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) dk = tuple(2 * np.pi / Li for Li in L) fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) cdtype = fft.cdtype if h > 0: stencil = FirstCenteredDifference(h) project = ps.Projector(fft, stencil.get_eigenvalues, dk, dx) derivs = ps.FiniteDifferencer(mpi, h, dx) else: project = ps.Projector(fft, lambda k, dx: k, dk, dx) derivs = ps.SpectralCollocator(fft, dk) vector_x = cla.empty(queue, (3, ) + pencil_shape, dtype) div = cla.empty(queue, rank_shape, dtype) pdx = cla.empty(queue, (3, ) + rank_shape, dtype) def get_divergence_error(vector): for mu in range(3): fft.idft(vector[mu], vector_x[mu]) derivs.divergence(queue, vector_x, div) derivs(queue, fx=vector_x[0], pdx=pdx[0]) derivs(queue, fx=vector_x[1], pdy=pdx[1]) derivs(queue, fx=vector_x[2], pdz=pdx[2]) norm = sum([clm.fabs(pdx[mu]) for mu in range(3)]) max_err = cla.max(clm.fabs(div)) / cla.max(norm) avg_err = cla.sum(clm.fabs(div)) / cla.sum(norm) return max_err, avg_err max_rtol = 1e-11 if dtype == np.float64 else 1e-4 avg_rtol = 1e-13 if dtype == np.float64 else 1e-5 k_shape = fft.shape(True) vector = cla.empty(queue, (3, ) + k_shape, cdtype) for mu in range(3): vector[mu] = make_data(queue, fft).astype(cdtype) project.transversify(queue, vector) max_err, avg_err = get_divergence_error(vector) assert max_err < max_rtol and avg_err < avg_rtol, \ f"transversify failed for {grid_shape=}, {h=}: {max_err=}, {avg_err=}" plus = make_data(queue, fft).astype(cdtype) minus = make_data(queue, fft).astype(cdtype) project.pol_to_vec(queue, plus, minus, vector) if isinstance(fft, gDFT): assert all(is_hermitian(vector[i]) for i in range(3)), \ f"pol->vec is non-hermitian for {grid_shape=}, {h=}" max_err, avg_err = get_divergence_error(vector) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pol_to_vec result not transverse for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" vector_h = vector.get() vector_2 = cla.zeros_like(vector) project.transversify(queue, vector, vector_2) vector_2_h = vector_2.get() max_err, avg_err = get_errs(vector_h, vector_2_h) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pol->vector != its own transverse proj. for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" plus1 = cla.zeros_like(plus) minus1 = cla.zeros_like(minus) project.vec_to_pol(queue, plus1, minus1, vector) if isinstance(fft, gDFT): assert is_hermitian(plus1) and is_hermitian(minus1), \ f"polarizations aren't hermitian for {grid_shape=}, {h=}" max_err, avg_err = get_errs(plus1.get(), plus.get()) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pol->vec->pol (plus) is not identity for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" max_err, avg_err = get_errs(minus1.get(), minus.get()) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pol->vec->pol (minus) is not identity for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" project.vec_to_pol(queue, vector[0], vector[1], vector) max_err, avg_err = get_errs(plus1.get(), vector[0].get()) assert max_err < max_rtol and avg_err < avg_rtol, \ f"in-place pol->vec->pol (plus) not identity for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" max_err, avg_err = get_errs(minus1.get(), vector[1].get()) assert max_err < max_rtol and avg_err < avg_rtol, \ f"in-place pol->vec->pol (minus) not identity for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" # reset and test longitudinal component for mu in range(3): vector[mu] = make_data(queue, fft).astype(cdtype) fft.idft(vector[mu], vector_x[mu]) long = cla.zeros_like(minus) project.decompose_vector(queue, vector, plus1, minus1, long) long_x = cla.empty(queue, pencil_shape, dtype) fft.idft(long, long_x) div_true = cla.empty(queue, rank_shape, dtype) derivs.divergence(queue, vector_x, div_true) derivs(queue, fx=long_x, grd=pdx) div_long = cla.empty(queue, rank_shape, dtype) if h != 0: pdx_h = cla.empty(queue, (3, ) + pencil_shape, dtype) for mu in range(3): mpi.restore_halos(queue, pdx[mu], pdx_h[mu]) derivs.divergence(queue, pdx_h, div_long) else: derivs.divergence(queue, pdx, div_long) max_err, avg_err = get_errs(div_true.get(), div_long.get()) assert max_err < 1e-6 and avg_err < 1e-11, \ f"lap(longitudinal) != div vector for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" if timing: from common import timer ntime = 10 t = timer(lambda: project.transversify(queue, vector), ntime=ntime) print(f"transversify took {t:.3f} ms for {grid_shape=}") t = timer(lambda: project.pol_to_vec(queue, plus, minus, vector), ntime=ntime) print(f"pol_to_vec took {t:.3f} ms for {grid_shape=}") t = timer(lambda: project.vec_to_pol(queue, plus, minus, vector), ntime=ntime) print(f"vec_to_pol took {t:.3f} ms for {grid_shape=}") t = timer( lambda: project.decompose_vector(queue, vector, plus, minus, long), ntime=ntime) print(f"decompose_vector took {t:.3f} ms for {grid_shape=}")
def test_tensor_projector(ctx_factory, grid_shape, proc_shape, h, dtype, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) L = (10, 8, 11.5) dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) dk = tuple(2 * np.pi / Li for Li in L) fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) cdtype = fft.cdtype if h > 0: stencil = FirstCenteredDifference(h) project = ps.Projector(fft, stencil.get_eigenvalues, dk, dx) derivs = ps.FiniteDifferencer(mpi, h, dx) else: project = ps.Projector(fft, lambda k, dx: k, dk, dx) derivs = ps.SpectralCollocator(fft, dk) vector_x = cla.empty(queue, (3, ) + tuple(ni + 2 * h for ni in rank_shape), dtype) div = cla.empty(queue, rank_shape, dtype) pdx = cla.empty(queue, (3, ) + rank_shape, dtype) def get_divergence_errors(hij): max_errors = [] avg_errors = [] for i in range(1, 4): for mu in range(3): fft.idft(hij[tensor_id(i, mu + 1)], vector_x[mu]) derivs.divergence(queue, vector_x, div) derivs(queue, fx=vector_x[0], pdx=pdx[0]) derivs(queue, fx=vector_x[1], pdy=pdx[1]) derivs(queue, fx=vector_x[2], pdz=pdx[2]) norm = sum([clm.fabs(pdx[mu]) for mu in range(3)]) max_errors.append(cla.max(clm.fabs(div)) / cla.max(norm)) avg_errors.append(cla.sum(clm.fabs(div)) / cla.sum(norm)) return np.array(max_errors), np.array(avg_errors) max_rtol = 1e-11 if dtype == np.float64 else 1e-4 avg_rtol = 1e-13 if dtype == np.float64 else 1e-5 def get_trace_errors(hij_h): trace = sum([hij_h[tensor_id(i, i)] for i in range(1, 4)]) norm = np.sqrt( sum(np.abs(hij_h[tensor_id(i, i)])**2 for i in range(1, 4))) trace = np.abs(trace[norm != 0]) / norm[norm != 0] trace = trace[trace < .9] return np.max(trace), np.sum(trace) / trace.size k_shape = fft.shape(True) hij = cla.empty(queue, shape=(6, ) + k_shape, dtype=cdtype) for mu in range(6): hij[mu] = make_data(queue, fft).astype(cdtype) project.transverse_traceless(queue, hij) hij_h = hij.get() if isinstance(fft, gDFT): assert all(is_hermitian(hij_h[i]) for i in range(6)), \ f"TT projection is non-hermitian for {grid_shape=}, {h=}" max_err, avg_err = get_divergence_errors(hij) assert all(max_err < max_rtol) and all(avg_err < avg_rtol), \ f"TT projection not transverse for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" max_err, avg_err = get_trace_errors(hij_h) assert max_err < max_rtol and avg_err < avg_rtol, \ f"TT projected tensor isn't traceless for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" plus = make_data(queue, fft).astype(cdtype) minus = make_data(queue, fft).astype(cdtype) project.pol_to_tensor(queue, plus, minus, hij) if isinstance(fft, gDFT): assert all(is_hermitian(hij[i]) for i in range(6)), \ f"pol->tensor is non-hermitian for {grid_shape=}, {h=}" max_err, avg_err = get_divergence_errors(hij) assert all(max_err < max_rtol) and all(avg_err < avg_rtol), \ f"pol->tensor not transverse for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" hij_h = hij.get() max_err, avg_err = get_trace_errors(hij_h) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pol->tensor isn't traceless for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" hij_2 = cla.zeros_like(hij) project.transverse_traceless(queue, hij, hij_2) hij_h_2 = hij_2.get() max_err, avg_err = get_errs(hij_h, hij_h_2) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pol->tensor != its own TT projection for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" plus1 = cla.zeros_like(plus) minus1 = cla.zeros_like(minus) project.tensor_to_pol(queue, plus1, minus1, hij) if isinstance(fft, gDFT): assert is_hermitian(plus1) and is_hermitian(minus1), \ f"polarizations aren't hermitian for {grid_shape=}, {h=}" max_err, avg_err = get_errs(plus1.get(), plus.get()) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pol->tensor->pol (plus) is not identity for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" max_err, avg_err = get_errs(minus1.get(), minus.get()) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pol->tensor->pol (minus) is not identity for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" project.tensor_to_pol(queue, hij[0], hij[1], hij) max_err, avg_err = get_errs(plus1.get(), hij[0].get()) assert max_err < max_rtol and avg_err < avg_rtol, \ f"in-place pol->tensor->pol (plus) not identity for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" max_err, avg_err = get_errs(minus1.get(), hij[1].get()) assert max_err < max_rtol and avg_err < avg_rtol, \ f"in-place pol->tensor->pol (minus) not identity for {grid_shape=}, {h=}" \ f": {max_err=}, {avg_err=}" if timing: from common import timer ntime = 10 t = timer(lambda: project.transverse_traceless(queue, hij), ntime=ntime) print(f"TT projection took {t:.3f} ms for {grid_shape=}") t = timer(lambda: project.pol_to_tensor(queue, plus, minus, hij), ntime=ntime) print(f"pol->tensor took {t:.3f} ms for {grid_shape=}") t = timer(lambda: project.tensor_to_pol(queue, plus, minus, hij), ntime=ntime) print(f"tensor->pol took {t:.3f} ms for {grid_shape=}")
def test_spectral_poisson(ctx_factory, grid_shape, proc_shape, h, dtype, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) L = (3, 5, 7) dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) dk = tuple(2 * np.pi / Li for Li in L) if h == 0: def get_evals_2(k, dx): return - k**2 derivs = ps.SpectralCollocator(fft, dk) else: from pystella.derivs import SecondCenteredDifference get_evals_2 = SecondCenteredDifference(h).get_eigenvalues derivs = ps.FiniteDifferencer(mpi, h, dx, stream=False) solver = ps.SpectralPoissonSolver(fft, dk, dx, get_evals_2) pencil_shape = tuple(ni + 2*h for ni in rank_shape) statistics = ps.FieldStatistics(mpi, 0, rank_shape=rank_shape, grid_size=np.product(grid_shape)) fx = cla.empty(queue, pencil_shape, dtype) rho = clr.rand(queue, rank_shape, dtype) rho -= statistics(rho)["mean"] lap = cla.empty(queue, rank_shape, dtype) rho_h = rho.get() for m_squared in (0, 1.2, 19.2): solver(queue, fx, rho, m_squared=m_squared) fx_h = fx.get() if h > 0: fx_h = fx_h[h:-h, h:-h, h:-h] derivs(queue, fx=fx, lap=lap) diff = np.fabs(lap.get() - rho_h - m_squared * fx_h) max_err = np.max(diff) / cla.max(clm.fabs(rho)) avg_err = np.sum(diff) / cla.sum(clm.fabs(rho)) max_rtol = 1e-12 if dtype == np.float64 else 1e-4 avg_rtol = 1e-13 if dtype == np.float64 else 1e-5 assert max_err < max_rtol and avg_err < avg_rtol, \ f"solution inaccurate for {h=}, {grid_shape=}, {proc_shape=}" if timing: from common import timer time = timer(lambda: solver(queue, fx, rho, m_squared=m_squared), ntime=10) if mpi.rank == 0: print(f"poisson took {time:.3f} ms for {grid_shape=}, {proc_shape=}")
def test_gradient_laplacian(ctx_factory, grid_shape, proc_shape, h, dtype, stream, timing=False): if h == 0 and stream is True: pytest.skip("no streaming spectral") ctx = ctx_factory() queue = cl.CommandQueue(ctx) mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, start = mpi.get_rank_shape_start(grid_shape) L = (3, 5, 7) dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) dk = tuple(2 * np.pi / Li for Li in L) if h == 0: def get_evals_1(k, dx): return k def get_evals_2(k, dx): return -k**2 fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) derivs = ps.SpectralCollocator(fft, dk) else: from pystella.derivs import FirstCenteredDifference, SecondCenteredDifference get_evals_1 = FirstCenteredDifference(h).get_eigenvalues get_evals_2 = SecondCenteredDifference(h).get_eigenvalues if stream: try: derivs = ps.FiniteDifferencer(mpi, h, dx, rank_shape=rank_shape, stream=stream) except: # noqa pytest.skip("StreamingStencil unavailable") else: derivs = ps.FiniteDifferencer(mpi, h, dx, rank_shape=rank_shape) pencil_shape = tuple(ni + 2 * h for ni in rank_shape) # set up test data fx_h = np.empty(pencil_shape, dtype) kvec = np.array(dk) * np.array([-5, 4, -3]).astype(dtype) xvec = np.meshgrid(*[ dxi * np.arange(si, si + ni) for dxi, si, ni in zip(dx, start, rank_shape) ], indexing="ij") phases = sum(ki * xi for ki, xi in zip(kvec, xvec)) if h > 0: fx_h[h:-h, h:-h, h:-h] = np.sin(phases) else: fx_h[:] = np.sin(phases) fx_cos = np.cos(phases) fx = cla.to_device(queue, fx_h) lap = cla.empty(queue, rank_shape, dtype) grd = cla.empty(queue, (3, ) + rank_shape, dtype) derivs(queue, fx=fx, lap=lap, grd=grd) eff_kmag_sq = sum( get_evals_2(kvec_i, dxi) for dxi, kvec_i in zip(dx, kvec)) lap_true = eff_kmag_sq * np.sin(phases) max_rtol = 1e-9 if dtype == np.float64 else 3e-4 avg_rtol = 1e-11 if dtype == np.float64 else 5e-5 # filter small values dominated by round-off error mask = np.abs(lap_true) > 1e-11 max_err, avg_err = get_errs(lap_true[mask], lap.get()[mask]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"lap inaccurate for {h=}, {grid_shape=}, {proc_shape=}:" \ f" {max_err=}, {avg_err=}" for i in range(3): eff_k = get_evals_1(kvec[i], dx[i]) pdi_true = eff_k * fx_cos # filter small values dominated by round-off error mask = np.abs(pdi_true) > 1e-11 max_err, avg_err = get_errs(pdi_true[mask], grd[i].get()[mask]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"pd{i} inaccurate for {h=}, {grid_shape=}, {proc_shape=}:" \ f" {max_err=}, {avg_err=}" vec = cla.empty(queue, (3, ) + pencil_shape, dtype) for mu in range(3): vec[mu] = fx div = cla.empty(queue, rank_shape, dtype) derivs.divergence(queue, vec, div) div_true = sum(grd[i] for i in range(3)).get() # filter small values dominated by round-off error mask = np.abs(div_true) > 1e-11 max_err, avg_err = get_errs(div_true[mask], div.get()[mask]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"div inaccurate for {h=}, {grid_shape=}, {proc_shape=}:" \ f" {max_err=}, {avg_err=}" if timing: from common import timer base_args = dict(queue=queue, fx=fx) div_args = dict(queue=queue, vec=vec, div=div) if h == 0: import pyopencl.tools as clt pool = clt.MemoryPool(clt.ImmediateAllocator(queue)) base_args["allocator"] = pool div_args["allocator"] = pool times = {} times["gradient and laplacian"] = timer( lambda: derivs(lap=lap, grd=grd, **base_args)) times["gradient"] = timer(lambda: derivs(grd=grd, **base_args)) times["laplacian"] = timer(lambda: derivs(lap=lap, **base_args)) times["pdx"] = timer(lambda: derivs(pdx=grd[0], **base_args)) times["pdy"] = timer(lambda: derivs(pdy=grd[1], **base_args)) times["pdz"] = timer(lambda: derivs(pdz=grd[2], **base_args)) times["divergence"] = timer(lambda: derivs.divergence(**div_args)) if mpi.rank == 0: print(f"{grid_shape=}, {h=}, {proc_shape=}") for key, val in times.items(): print(f"{key} took {val:.3f} ms")
sigma = 0. lambda4 = 0. f0 = [.193 * mpl, 0] # units of mpl df0 = [-.142231 * mpl, 0] # units of mpl end_time = 1 end_scale_factor = 20 Stepper = ps.LowStorageRK54 gravitational_waves = True # whether to simulate gravitational waves ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) decomp = ps.DomainDecomposition(proc_shape, halo_shape, rank_shape) fft = ps.DFT(decomp, ctx, queue, grid_shape, dtype) if halo_shape == 0: derivs = ps.SpectralCollocator(fft, dk) else: derivs = ps.FiniteDifferencer(decomp, halo_shape, dx, rank_shape=rank_shape) def potential(f): phi, chi = f[0], f[1] unscaled = (mphi**2 / 2 * phi**2 + mchi**2 / 2 * chi**2 + gsq / 2 * phi**2 * chi**2 + sigma / 2 * phi * chi**2 + lambda4 / 4 * chi**4) return unscaled / mphi**2