Exemplo n.º 1
0
def test_reduction(ctx_factory,
                   grid_shape,
                   proc_shape,
                   dtype,
                   op,
                   _grid_shape,
                   pass_grid_dims,
                   timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    h = 1
    grid_shape = _grid_shape or grid_shape
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    from pymbolic import var
    from pystella import Field
    tmp_insns = [(var("x"), Field("f") / 2 + .31)]

    reducers = {}
    reducers["avg"] = [(var("x"), op)]

    if pass_grid_dims:
        reducer = ps.Reduction(mpi,
                               reducers,
                               rank_shape=rank_shape,
                               tmp_instructions=tmp_insns,
                               grid_size=np.product(grid_shape))
    else:
        reducer = ps.Reduction(mpi, reducers, tmp_instructions=tmp_insns)

    f = clr.rand(queue, rank_shape, dtype=dtype)

    import pyopencl.tools as clt
    pool = clt.MemoryPool(clt.ImmediateAllocator(queue))

    result = reducer(queue, f=f, allocator=pool)
    avg = result["avg"]

    avg_test = reducer.reduce_array(f / 2 + .31, op)
    if op == "avg":
        avg_test /= np.product(grid_shape)

    rtol = 5e-14 if dtype == np.float64 else 1e-5
    assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \
        f"{op} reduction innaccurate for {grid_shape=}, {proc_shape=}"

    if timing:
        from common import timer
        t = timer(lambda: reducer(queue, f=f, allocator=pool), ntime=1000)
        if mpi.rank == 0:
            print(
                f"reduction took {t:.3f} ms for {grid_shape=}, {proc_shape=}")
            bandwidth = f.nbytes / 1024**3 / t * 1000
            print(f"Bandwidth = {bandwidth:.1f} GB/s")
Exemplo n.º 2
0
def test_reduction_with_new_shape(ctx_factory,
                                  grid_shape,
                                  proc_shape,
                                  dtype,
                                  op,
                                  _grid_shape,
                                  timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    h = 1
    grid_shape = _grid_shape or grid_shape
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    from pystella import Field
    reducers = {}
    reducers["avg"] = [(Field("f"), op)]

    reducer = ps.Reduction(mpi, reducers)

    f = clr.rand(queue, rank_shape, dtype=dtype)
    result = reducer(queue, f=f)
    avg = result["avg"]

    avg_test = reducer.reduce_array(f, op)
    if op == "avg":
        avg_test /= np.product(grid_shape)

    rtol = 5e-14 if dtype == np.float64 else 1e-5
    assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \
        f"{op} reduction innaccurate for {grid_shape=}, {proc_shape=}"

    # test call to reducer with new shape
    grid_shape = tuple(Ni // 2 for Ni in grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)
    f = clr.rand(queue, rank_shape, dtype=dtype)
    result = reducer(queue, f=f)
    avg = result["avg"]

    avg_test = reducer.reduce_array(f, op)
    if op == "avg":
        avg_test /= np.product(grid_shape)

    rtol = 5e-14 if dtype == np.float64 else 1e-5
    assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \
        f"{op} reduction w/new shape innaccurate for {grid_shape=}, {proc_shape=}"
Exemplo n.º 3
0
def test_scalar_energy(ctx_factory,
                       grid_shape,
                       proc_shape,
                       h,
                       dtype,
                       timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    grid_size = np.product(grid_shape)

    nscalars = 2

    def potential(f):
        phi, chi = f[0], f[1]
        return 1 / 2 * phi**2 + 1 / 2 * chi**2 + 1 / 2 * phi**2 * chi**2

    scalar_sector = ps.ScalarSector(nscalars, potential=potential)
    scalar_energy = ps.Reduction(mpi,
                                 scalar_sector,
                                 rank_shape=rank_shape,
                                 grid_size=grid_size,
                                 halo_shape=h)

    pencil_shape = tuple(ni + 2 * h for ni in rank_shape)
    f = clr.rand(queue, (nscalars, ) + pencil_shape, dtype)
    dfdt = clr.rand(queue, (nscalars, ) + pencil_shape, dtype)
    lap = clr.rand(queue, (nscalars, ) + rank_shape, dtype)

    energy = scalar_energy(queue, f=f, dfdt=dfdt, lap_f=lap, a=np.array(1.))

    kin_test = []
    grad_test = []
    for fld in range(nscalars):
        df_h = dfdt[fld].get()
        rank_sum = np.sum(df_h[h:-h, h:-h, h:-h]**2)
        kin_test.append(1 / 2 * mpi.allreduce(rank_sum) / grid_size)

        f_h = f[fld].get()
        lap_h = lap[fld].get()

        rank_sum = np.sum(-f_h[h:-h, h:-h, h:-h] * lap_h)
        grad_test.append(1 / 2 * mpi.allreduce(rank_sum) / grid_size)

    energy_test = {}
    energy_test["kinetic"] = np.array(kin_test)
    energy_test["gradient"] = np.array(grad_test)

    phi = f[0].get()[h:-h, h:-h, h:-h]
    chi = f[1].get()[h:-h, h:-h, h:-h]
    pot_rank = np.sum(potential([phi, chi]))
    energy_test["potential"] = np.array(mpi.allreduce(pot_rank) / grid_size)

    max_rtol = 1e-14 if dtype == np.float64 else 1e-5
    avg_rtol = 1e-14 if dtype == np.float64 else 1e-5

    for key, value in energy.items():
        max_err, avg_err = get_errs(value, energy_test[key])
        assert max_err < max_rtol and avg_err < avg_rtol, \
            f"{key} inaccurate for {nscalars=}, {grid_shape=}, {proc_shape=}" \
            f": {max_err=}, {avg_err=}"

    if timing:
        from common import timer
        t = timer(lambda: scalar_energy(
            queue, a=np.array(1.), f=f, dfdt=dfdt, lap_f=lap))
        if mpi.rank == 0:
            print(f"scalar energy took {t:.3f} "
                  f"ms for {nscalars=}, {grid_shape=}, {proc_shape=}")
Exemplo n.º 4
0

scalar_sector = ps.ScalarSector(nscalars, potential=potential)
sectors = [scalar_sector]
if gravitational_waves:
    gw_sector = ps.TensorPerturbationSector([scalar_sector])
    sectors += [gw_sector]

stepper = Stepper(sectors, halo_shape=halo_shape, rank_shape=rank_shape, dt=dt)

# create energy computation function
from pystella.sectors import get_rho_and_p

reduce_energy = ps.Reduction(decomp,
                             scalar_sector,
                             halo_shape=halo_shape,
                             callback=get_rho_and_p,
                             rank_shape=rank_shape,
                             grid_size=grid_size)


def compute_energy(f, dfdt, lap_f, dfdx, a):
    if gravitational_waves:
        derivs(queue, fx=f, lap=lap_f, grd=dfdx)
    else:
        derivs(queue, fx=f, lap=lap_f)

    return reduce_energy(queue, f=f, dfdt=dfdt, lap_f=lap_f, a=np.array(a))


# create output function
if decomp.rank == 0: