示例#1
0
def test_vector_projector(ctx_factory,
                          grid_shape,
                          proc_shape,
                          h,
                          dtype,
                          timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)
    pencil_shape = tuple(ni + 2 * h for ni in rank_shape)

    L = (10, 8, 11.5)
    dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
    dk = tuple(2 * np.pi / Li for Li in L)

    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)
    cdtype = fft.cdtype
    if h > 0:
        stencil = FirstCenteredDifference(h)
        project = ps.Projector(fft, stencil.get_eigenvalues, dk, dx)
        derivs = ps.FiniteDifferencer(mpi, h, dx)
    else:
        project = ps.Projector(fft, lambda k, dx: k, dk, dx)
        derivs = ps.SpectralCollocator(fft, dk)

    vector_x = cla.empty(queue, (3, ) + pencil_shape, dtype)
    div = cla.empty(queue, rank_shape, dtype)
    pdx = cla.empty(queue, (3, ) + rank_shape, dtype)

    def get_divergence_error(vector):
        for mu in range(3):
            fft.idft(vector[mu], vector_x[mu])

        derivs.divergence(queue, vector_x, div)

        derivs(queue, fx=vector_x[0], pdx=pdx[0])
        derivs(queue, fx=vector_x[1], pdy=pdx[1])
        derivs(queue, fx=vector_x[2], pdz=pdx[2])
        norm = sum([clm.fabs(pdx[mu]) for mu in range(3)])

        max_err = cla.max(clm.fabs(div)) / cla.max(norm)
        avg_err = cla.sum(clm.fabs(div)) / cla.sum(norm)
        return max_err, avg_err

    max_rtol = 1e-11 if dtype == np.float64 else 1e-4
    avg_rtol = 1e-13 if dtype == np.float64 else 1e-5

    k_shape = fft.shape(True)
    vector = cla.empty(queue, (3, ) + k_shape, cdtype)

    for mu in range(3):
        vector[mu] = make_data(queue, fft).astype(cdtype)

    project.transversify(queue, vector)

    max_err, avg_err = get_divergence_error(vector)
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"transversify failed for {grid_shape=}, {h=}: {max_err=}, {avg_err=}"

    plus = make_data(queue, fft).astype(cdtype)
    minus = make_data(queue, fft).astype(cdtype)
    project.pol_to_vec(queue, plus, minus, vector)

    if isinstance(fft, gDFT):
        assert all(is_hermitian(vector[i]) for i in range(3)), \
            f"pol->vec is non-hermitian for {grid_shape=}, {h=}"

    max_err, avg_err = get_divergence_error(vector)
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"pol_to_vec result not transverse for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    vector_h = vector.get()
    vector_2 = cla.zeros_like(vector)
    project.transversify(queue, vector, vector_2)
    vector_2_h = vector_2.get()

    max_err, avg_err = get_errs(vector_h, vector_2_h)
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"pol->vector != its own transverse proj. for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    plus1 = cla.zeros_like(plus)
    minus1 = cla.zeros_like(minus)
    project.vec_to_pol(queue, plus1, minus1, vector)

    if isinstance(fft, gDFT):
        assert is_hermitian(plus1) and is_hermitian(minus1), \
            f"polarizations aren't hermitian for {grid_shape=}, {h=}"

    max_err, avg_err = get_errs(plus1.get(), plus.get())
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"pol->vec->pol (plus) is not identity for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    max_err, avg_err = get_errs(minus1.get(), minus.get())
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"pol->vec->pol (minus) is not identity for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    project.vec_to_pol(queue, vector[0], vector[1], vector)

    max_err, avg_err = get_errs(plus1.get(), vector[0].get())
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"in-place pol->vec->pol (plus) not identity for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    max_err, avg_err = get_errs(minus1.get(), vector[1].get())
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"in-place pol->vec->pol (minus) not identity for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    # reset and test longitudinal component
    for mu in range(3):
        vector[mu] = make_data(queue, fft).astype(cdtype)
        fft.idft(vector[mu], vector_x[mu])

    long = cla.zeros_like(minus)
    project.decompose_vector(queue, vector, plus1, minus1, long)

    long_x = cla.empty(queue, pencil_shape, dtype)
    fft.idft(long, long_x)

    div_true = cla.empty(queue, rank_shape, dtype)
    derivs.divergence(queue, vector_x, div_true)

    derivs(queue, fx=long_x, grd=pdx)
    div_long = cla.empty(queue, rank_shape, dtype)
    if h != 0:
        pdx_h = cla.empty(queue, (3, ) + pencil_shape, dtype)
        for mu in range(3):
            mpi.restore_halos(queue, pdx[mu], pdx_h[mu])
        derivs.divergence(queue, pdx_h, div_long)
    else:
        derivs.divergence(queue, pdx, div_long)

    max_err, avg_err = get_errs(div_true.get(), div_long.get())
    assert max_err < 1e-6 and avg_err < 1e-11, \
        f"lap(longitudinal) != div vector for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    if timing:
        from common import timer
        ntime = 10
        t = timer(lambda: project.transversify(queue, vector), ntime=ntime)
        print(f"transversify took {t:.3f} ms for {grid_shape=}")
        t = timer(lambda: project.pol_to_vec(queue, plus, minus, vector),
                  ntime=ntime)
        print(f"pol_to_vec took {t:.3f} ms for {grid_shape=}")
        t = timer(lambda: project.vec_to_pol(queue, plus, minus, vector),
                  ntime=ntime)
        print(f"vec_to_pol took {t:.3f} ms for {grid_shape=}")
        t = timer(
            lambda: project.decompose_vector(queue, vector, plus, minus, long),
            ntime=ntime)
        print(f"decompose_vector took {t:.3f} ms for {grid_shape=}")
示例#2
0
def test_tensor_projector(ctx_factory,
                          grid_shape,
                          proc_shape,
                          h,
                          dtype,
                          timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    L = (10, 8, 11.5)
    dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
    dk = tuple(2 * np.pi / Li for Li in L)

    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)
    cdtype = fft.cdtype
    if h > 0:
        stencil = FirstCenteredDifference(h)
        project = ps.Projector(fft, stencil.get_eigenvalues, dk, dx)
        derivs = ps.FiniteDifferencer(mpi, h, dx)
    else:
        project = ps.Projector(fft, lambda k, dx: k, dk, dx)
        derivs = ps.SpectralCollocator(fft, dk)

    vector_x = cla.empty(queue, (3, ) + tuple(ni + 2 * h for ni in rank_shape),
                         dtype)
    div = cla.empty(queue, rank_shape, dtype)
    pdx = cla.empty(queue, (3, ) + rank_shape, dtype)

    def get_divergence_errors(hij):
        max_errors = []
        avg_errors = []
        for i in range(1, 4):
            for mu in range(3):
                fft.idft(hij[tensor_id(i, mu + 1)], vector_x[mu])

            derivs.divergence(queue, vector_x, div)

            derivs(queue, fx=vector_x[0], pdx=pdx[0])
            derivs(queue, fx=vector_x[1], pdy=pdx[1])
            derivs(queue, fx=vector_x[2], pdz=pdx[2])
            norm = sum([clm.fabs(pdx[mu]) for mu in range(3)])

            max_errors.append(cla.max(clm.fabs(div)) / cla.max(norm))
            avg_errors.append(cla.sum(clm.fabs(div)) / cla.sum(norm))

        return np.array(max_errors), np.array(avg_errors)

    max_rtol = 1e-11 if dtype == np.float64 else 1e-4
    avg_rtol = 1e-13 if dtype == np.float64 else 1e-5

    def get_trace_errors(hij_h):
        trace = sum([hij_h[tensor_id(i, i)] for i in range(1, 4)])
        norm = np.sqrt(
            sum(np.abs(hij_h[tensor_id(i, i)])**2 for i in range(1, 4)))

        trace = np.abs(trace[norm != 0]) / norm[norm != 0]
        trace = trace[trace < .9]
        return np.max(trace), np.sum(trace) / trace.size

    k_shape = fft.shape(True)
    hij = cla.empty(queue, shape=(6, ) + k_shape, dtype=cdtype)

    for mu in range(6):
        hij[mu] = make_data(queue, fft).astype(cdtype)

    project.transverse_traceless(queue, hij)
    hij_h = hij.get()

    if isinstance(fft, gDFT):
        assert all(is_hermitian(hij_h[i]) for i in range(6)), \
            f"TT projection is non-hermitian for {grid_shape=}, {h=}"

    max_err, avg_err = get_divergence_errors(hij)
    assert all(max_err < max_rtol) and all(avg_err < avg_rtol), \
        f"TT projection not transverse for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    max_err, avg_err = get_trace_errors(hij_h)
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"TT projected tensor isn't traceless for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    plus = make_data(queue, fft).astype(cdtype)
    minus = make_data(queue, fft).astype(cdtype)
    project.pol_to_tensor(queue, plus, minus, hij)

    if isinstance(fft, gDFT):
        assert all(is_hermitian(hij[i]) for i in range(6)), \
            f"pol->tensor is non-hermitian for {grid_shape=}, {h=}"

    max_err, avg_err = get_divergence_errors(hij)
    assert all(max_err < max_rtol) and all(avg_err < avg_rtol), \
        f"pol->tensor not transverse for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    hij_h = hij.get()
    max_err, avg_err = get_trace_errors(hij_h)
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"pol->tensor isn't traceless for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    hij_2 = cla.zeros_like(hij)
    project.transverse_traceless(queue, hij, hij_2)
    hij_h_2 = hij_2.get()

    max_err, avg_err = get_errs(hij_h, hij_h_2)
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"pol->tensor != its own TT projection for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    plus1 = cla.zeros_like(plus)
    minus1 = cla.zeros_like(minus)
    project.tensor_to_pol(queue, plus1, minus1, hij)

    if isinstance(fft, gDFT):
        assert is_hermitian(plus1) and is_hermitian(minus1), \
            f"polarizations aren't hermitian for {grid_shape=}, {h=}"

    max_err, avg_err = get_errs(plus1.get(), plus.get())
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"pol->tensor->pol (plus) is not identity for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    max_err, avg_err = get_errs(minus1.get(), minus.get())
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"pol->tensor->pol (minus) is not identity for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    project.tensor_to_pol(queue, hij[0], hij[1], hij)

    max_err, avg_err = get_errs(plus1.get(), hij[0].get())
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"in-place pol->tensor->pol (plus) not identity for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    max_err, avg_err = get_errs(minus1.get(), hij[1].get())
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"in-place pol->tensor->pol (minus) not identity for {grid_shape=}, {h=}" \
        f": {max_err=}, {avg_err=}"

    if timing:
        from common import timer
        ntime = 10
        t = timer(lambda: project.transverse_traceless(queue, hij),
                  ntime=ntime)
        print(f"TT projection took {t:.3f} ms for {grid_shape=}")
        t = timer(lambda: project.pol_to_tensor(queue, plus, minus, hij),
                  ntime=ntime)
        print(f"pol->tensor took {t:.3f} ms for {grid_shape=}")
        t = timer(lambda: project.tensor_to_pol(queue, plus, minus, hij),
                  ntime=ntime)
        print(f"tensor->pol took {t:.3f} ms for {grid_shape=}")
示例#3
0
def test_spectral_poisson(ctx_factory, grid_shape, proc_shape, h, dtype,
                          timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)
    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)

    L = (3, 5, 7)
    dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
    dk = tuple(2 * np.pi / Li for Li in L)

    if h == 0:
        def get_evals_2(k, dx):
            return - k**2

        derivs = ps.SpectralCollocator(fft, dk)
    else:
        from pystella.derivs import SecondCenteredDifference
        get_evals_2 = SecondCenteredDifference(h).get_eigenvalues
        derivs = ps.FiniteDifferencer(mpi, h, dx, stream=False)

    solver = ps.SpectralPoissonSolver(fft, dk, dx, get_evals_2)

    pencil_shape = tuple(ni + 2*h for ni in rank_shape)

    statistics = ps.FieldStatistics(mpi, 0, rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    fx = cla.empty(queue, pencil_shape, dtype)
    rho = clr.rand(queue, rank_shape, dtype)
    rho -= statistics(rho)["mean"]
    lap = cla.empty(queue, rank_shape, dtype)
    rho_h = rho.get()

    for m_squared in (0, 1.2, 19.2):
        solver(queue, fx, rho, m_squared=m_squared)
        fx_h = fx.get()
        if h > 0:
            fx_h = fx_h[h:-h, h:-h, h:-h]

        derivs(queue, fx=fx, lap=lap)

        diff = np.fabs(lap.get() - rho_h - m_squared * fx_h)
        max_err = np.max(diff) / cla.max(clm.fabs(rho))
        avg_err = np.sum(diff) / cla.sum(clm.fabs(rho))

        max_rtol = 1e-12 if dtype == np.float64 else 1e-4
        avg_rtol = 1e-13 if dtype == np.float64 else 1e-5

        assert max_err < max_rtol and avg_err < avg_rtol, \
            f"solution inaccurate for {h=}, {grid_shape=}, {proc_shape=}"

    if timing:
        from common import timer
        time = timer(lambda: solver(queue, fx, rho, m_squared=m_squared), ntime=10)

        if mpi.rank == 0:
            print(f"poisson took {time:.3f} ms for {grid_shape=}, {proc_shape=}")
示例#4
0
def test_gradient_laplacian(ctx_factory,
                            grid_shape,
                            proc_shape,
                            h,
                            dtype,
                            stream,
                            timing=False):
    if h == 0 and stream is True:
        pytest.skip("no streaming spectral")

    ctx = ctx_factory()

    queue = cl.CommandQueue(ctx)
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, start = mpi.get_rank_shape_start(grid_shape)

    L = (3, 5, 7)
    dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
    dk = tuple(2 * np.pi / Li for Li in L)

    if h == 0:

        def get_evals_1(k, dx):
            return k

        def get_evals_2(k, dx):
            return -k**2

        fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)
        derivs = ps.SpectralCollocator(fft, dk)
    else:
        from pystella.derivs import FirstCenteredDifference, SecondCenteredDifference
        get_evals_1 = FirstCenteredDifference(h).get_eigenvalues
        get_evals_2 = SecondCenteredDifference(h).get_eigenvalues
        if stream:
            try:
                derivs = ps.FiniteDifferencer(mpi,
                                              h,
                                              dx,
                                              rank_shape=rank_shape,
                                              stream=stream)
            except:  # noqa
                pytest.skip("StreamingStencil unavailable")
        else:
            derivs = ps.FiniteDifferencer(mpi, h, dx, rank_shape=rank_shape)

    pencil_shape = tuple(ni + 2 * h for ni in rank_shape)

    # set up test data
    fx_h = np.empty(pencil_shape, dtype)
    kvec = np.array(dk) * np.array([-5, 4, -3]).astype(dtype)
    xvec = np.meshgrid(*[
        dxi * np.arange(si, si + ni)
        for dxi, si, ni in zip(dx, start, rank_shape)
    ],
                       indexing="ij")

    phases = sum(ki * xi for ki, xi in zip(kvec, xvec))
    if h > 0:
        fx_h[h:-h, h:-h, h:-h] = np.sin(phases)
    else:
        fx_h[:] = np.sin(phases)
    fx_cos = np.cos(phases)

    fx = cla.to_device(queue, fx_h)

    lap = cla.empty(queue, rank_shape, dtype)
    grd = cla.empty(queue, (3, ) + rank_shape, dtype)
    derivs(queue, fx=fx, lap=lap, grd=grd)

    eff_kmag_sq = sum(
        get_evals_2(kvec_i, dxi) for dxi, kvec_i in zip(dx, kvec))
    lap_true = eff_kmag_sq * np.sin(phases)

    max_rtol = 1e-9 if dtype == np.float64 else 3e-4
    avg_rtol = 1e-11 if dtype == np.float64 else 5e-5

    # filter small values dominated by round-off error
    mask = np.abs(lap_true) > 1e-11
    max_err, avg_err = get_errs(lap_true[mask], lap.get()[mask])
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"lap inaccurate for {h=}, {grid_shape=}, {proc_shape=}:" \
        f" {max_err=}, {avg_err=}"

    for i in range(3):
        eff_k = get_evals_1(kvec[i], dx[i])
        pdi_true = eff_k * fx_cos

        # filter small values dominated by round-off error
        mask = np.abs(pdi_true) > 1e-11
        max_err, avg_err = get_errs(pdi_true[mask], grd[i].get()[mask])
        assert max_err < max_rtol and avg_err < avg_rtol, \
            f"pd{i} inaccurate for {h=}, {grid_shape=}, {proc_shape=}:" \
            f" {max_err=}, {avg_err=}"

    vec = cla.empty(queue, (3, ) + pencil_shape, dtype)
    for mu in range(3):
        vec[mu] = fx

    div = cla.empty(queue, rank_shape, dtype)
    derivs.divergence(queue, vec, div)
    div_true = sum(grd[i] for i in range(3)).get()

    # filter small values dominated by round-off error
    mask = np.abs(div_true) > 1e-11
    max_err, avg_err = get_errs(div_true[mask], div.get()[mask])
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"div inaccurate for {h=}, {grid_shape=}, {proc_shape=}:" \
        f" {max_err=}, {avg_err=}"

    if timing:
        from common import timer

        base_args = dict(queue=queue, fx=fx)
        div_args = dict(queue=queue, vec=vec, div=div)
        if h == 0:
            import pyopencl.tools as clt
            pool = clt.MemoryPool(clt.ImmediateAllocator(queue))
            base_args["allocator"] = pool
            div_args["allocator"] = pool

        times = {}
        times["gradient and laplacian"] = timer(
            lambda: derivs(lap=lap, grd=grd, **base_args))
        times["gradient"] = timer(lambda: derivs(grd=grd, **base_args))
        times["laplacian"] = timer(lambda: derivs(lap=lap, **base_args))
        times["pdx"] = timer(lambda: derivs(pdx=grd[0], **base_args))
        times["pdy"] = timer(lambda: derivs(pdy=grd[1], **base_args))
        times["pdz"] = timer(lambda: derivs(pdz=grd[2], **base_args))
        times["divergence"] = timer(lambda: derivs.divergence(**div_args))

        if mpi.rank == 0:
            print(f"{grid_shape=}, {h=}, {proc_shape=}")
            for key, val in times.items():
                print(f"{key} took {val:.3f} ms")
示例#5
0
sigma = 0.
lambda4 = 0.
f0 = [.193 * mpl, 0]  # units of mpl
df0 = [-.142231 * mpl, 0]  # units of mpl
end_time = 1
end_scale_factor = 20
Stepper = ps.LowStorageRK54
gravitational_waves = True  # whether to simulate gravitational waves

ctx = ps.choose_device_and_make_context()
queue = cl.CommandQueue(ctx)

decomp = ps.DomainDecomposition(proc_shape, halo_shape, rank_shape)
fft = ps.DFT(decomp, ctx, queue, grid_shape, dtype)
if halo_shape == 0:
    derivs = ps.SpectralCollocator(fft, dk)
else:
    derivs = ps.FiniteDifferencer(decomp,
                                  halo_shape,
                                  dx,
                                  rank_shape=rank_shape)


def potential(f):
    phi, chi = f[0], f[1]
    unscaled = (mphi**2 / 2 * phi**2 + mchi**2 / 2 * chi**2 +
                gsq / 2 * phi**2 * chi**2 + sigma / 2 * phi * chi**2 +
                lambda4 / 4 * chi**4)
    return unscaled / mphi**2