Пример #1
0
    def __init__(self, energy, Stepper, mpl=1., dtype=np.float64):
        self.mpl = mpl
        from pystella.step import LowStorageRKStepper

        self.is_low_storage = LowStorageRKStepper in Stepper.__bases__
        num_copies = Stepper.__dict__.get("num_copies", 1)
        shape = (num_copies,)
        arg_shape = (1,) if self.is_low_storage else tuple()
        self.a = np.ones(shape, dtype=dtype)
        self.adot = self.adot_friedmann_1(self.a, energy)
        self.hubble = self.adot / self.a

        slc = (0,) if self.is_low_storage else ()
        from pystella import Field
        _a = Field("a", indices=[], shape=arg_shape)[slc]
        _adot = Field("adot", indices=[], shape=arg_shape)[slc]
        from pymbolic import var
        _e = var("energy")
        _p = var("pressure")
        rhs_dict = {_a: _adot,
                    _adot: self.addot_friedmann_2(_a, _e, _p)}

        from pystella import DisableLogging
        from loopy.target.c.c_execution import logger as c_logger
        with DisableLogging(c_logger):  # silence GCCToolchain warning
            self.stepper = Stepper(rhs_dict, rank_shape=(0, 0, 0),
                                   halo_shape=0, dtype=dtype,
                                   target=lp.ExecutableCTarget())
Пример #2
0
    def make_shift_kernel(self, **kwargs):
        f = Field("f", offset=0)
        tmp = Field("tmp", offset=0)
        from pymbolic import var
        shift = var("shift")
        scale = var("scale")
        self.shift_dict = {tmp: scale * f + shift}

        args = [...]
        from pystella import ElementWiseMap
        self.shifter = ElementWiseMap(self.shift_dict, args=args, **kwargs)
Пример #3
0
    def rhs_dict(self):
        f = self.f
        H = Field("hubble", indices=[])
        a = Field("a", indices=[])

        rhs_dict = {}
        V = self.potential(f)

        for fld in range(self.nscalars):
            rhs_dict[f[fld]] = f.dot[fld]
            rhs_dict[f.dot[fld]] = (f.lap[fld] - 2 * H * f.dot[fld] -
                                    a**2 * diff(V, f[fld]))
        return rhs_dict
Пример #4
0
    def __init__(self, decomp, num_bins, dtype, **kwargs):
        from pymbolic import parse
        import pymbolic.functions as pf

        max_f, min_f = parse("max_f, min_f")
        max_log_f, min_log_f = parse("max_log_f, min_log_f")

        halo_shape = kwargs.pop("halo_shape", 0)
        f = Field("f", offset=halo_shape)

        def clip(expr):
            _min, _max = parse("min, max")
            return _max(_min(expr, num_bins - 1), 0)

        linear_bin = (f - min_f) / (max_f - min_f)
        log_bin = (pf.log(pf.fabs(f)) - min_log_f) / (max_log_f - min_log_f)
        histograms = {
            "linear": (clip(linear_bin * num_bins), 1),
            "log": (clip(log_bin * num_bins), 1)
        }

        super().__init__(decomp, histograms, num_bins, dtype, **kwargs)

        reducers = {}
        reducers["max_f"] = [(f, "max")]
        reducers["min_f"] = [(f, "min")]
        reducers["max_log_f"] = [(pf.log(pf.fabs(f)), "max")]
        reducers["min_log_f"] = [(pf.log(pf.fabs(f)), "min")]

        self.get_min_max = Reduction(decomp,
                                     reducers,
                                     halo_shape=halo_shape,
                                     **kwargs)
Пример #5
0
def test_reduction(ctx_factory,
                   grid_shape,
                   proc_shape,
                   dtype,
                   op,
                   _grid_shape,
                   pass_grid_dims,
                   timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    h = 1
    grid_shape = _grid_shape or grid_shape
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    from pymbolic import var
    from pystella import Field
    tmp_insns = [(var("x"), Field("f") / 2 + .31)]

    reducers = {}
    reducers["avg"] = [(var("x"), op)]

    if pass_grid_dims:
        reducer = ps.Reduction(mpi,
                               reducers,
                               rank_shape=rank_shape,
                               tmp_instructions=tmp_insns,
                               grid_size=np.product(grid_shape))
    else:
        reducer = ps.Reduction(mpi, reducers, tmp_instructions=tmp_insns)

    f = clr.rand(queue, rank_shape, dtype=dtype)

    import pyopencl.tools as clt
    pool = clt.MemoryPool(clt.ImmediateAllocator(queue))

    result = reducer(queue, f=f, allocator=pool)
    avg = result["avg"]

    avg_test = reducer.reduce_array(f / 2 + .31, op)
    if op == "avg":
        avg_test /= np.product(grid_shape)

    rtol = 5e-14 if dtype == np.float64 else 1e-5
    assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \
        f"{op} reduction innaccurate for {grid_shape=}, {proc_shape=}"

    if timing:
        from common import timer
        t = timer(lambda: reducer(queue, f=f, allocator=pool), ntime=1000)
        if mpi.rank == 0:
            print(
                f"reduction took {t:.3f} ms for {grid_shape=}, {proc_shape=}")
            bandwidth = f.nbytes / 1024**3 / t * 1000
            print(f"Bandwidth = {bandwidth:.1f} GB/s")
Пример #6
0
    def make_residual_kernel(self, MapKernel, **kwargs):
        residual_dict = {}
        for f, (lhs, rho) in self.lhs_dict.items():
            resid = Field("r_" + f.child.name, offset="h")
            residual_dict[resid] = rho - lhs

        args = self.unknown_args + self.rho_args + self.residual_args
        self.residual = MapKernel(residual_dict, args=args, **kwargs)
Пример #7
0
    def make_stepper(self, MapKernel, **kwargs):
        self.step_dict = {}
        for f, (lhs, rho) in self.lhs_dict.items():
            tmp = Field("tmp_" + f.child.name, offset=f.offset)
            self.step_dict[tmp] = self.step_operator(f, lhs, rho)

        args = self.unknown_args + self.rho_args + self.temp_args
        self.stepper = MapKernel(self.step_dict, args=args, **kwargs)
Пример #8
0
def test_get_field_args(proc_shape):
    if proc_shape != (1, 1, 1):
        pytest.skip("test field only on one rank")

    from pystella import Field, DynamicField, get_field_args

    x = Field("x", offset=(1, 2, 3))
    y = Field("y", offset="h")
    z = DynamicField("z", shape=(2, "a"))

    import loopy as lp
    true_args = [
        lp.GlobalArg("x", shape="(Nx+2, Ny+4, Nz+6)", offset=lp.auto),
        lp.GlobalArg("y", shape="(Nx+2*h, Ny+2*h, Nz+2*h)", offset=lp.auto),
        lp.GlobalArg("z", shape="(2, a, Nx, Ny, Nz)", offset=lp.auto),
        lp.GlobalArg("dzdx", shape="(2, a, 3, Nx, Ny, Nz)", offset=lp.auto),
    ]

    def lists_equal(a, b):
        equal = True
        for x in a:
            equal *= x in b
        for x in b:
            equal *= x in a
        return equal

    expressions = {x: y, y: x * z + z.pd[0]}
    args = get_field_args(expressions)
    assert lists_equal(args, true_args)

    expressions = x * y + z + z.pd[2]
    args = get_field_args(expressions)
    assert lists_equal(args, true_args)

    expressions = [x, y, y * z**2, 3 + z.pd[0] + z.pd[1]]
    args = get_field_args(expressions)
    assert lists_equal(args, true_args)

    expressions = [shift_fields(x, (1, 2, 3)), y + z.pd[0], y * z**2]
    args = get_field_args(expressions)
    assert lists_equal(args, true_args)
Пример #9
0
def test_collect_field_indices(proc_shape):
    if proc_shape != (1, 1, 1):
        pytest.skip("test field only on one rank")

    from pystella import Field, DynamicField
    from pystella.field import collect_field_indices

    x = Field("x", offset=(1, 2, 3))
    y = Field("y", indices=("i", "x"), offset="h")
    z = DynamicField("z", shape=(2, "a"))

    expressions = {x: y, y: x * z + z.pd[0]}
    indices = collect_field_indices(expressions)
    assert indices == {"i", "j", "k", "x"}

    expressions = [x, z]
    indices = collect_field_indices(expressions)
    assert indices == {"i", "j", "k"}

    expressions = [shift_fields(x, (1, 2, 3)), y + z.pd[0], y * z**2]
    indices = collect_field_indices(expressions)
    assert indices == {"i", "j", "k", "x"}
Пример #10
0
def test_reduction_with_new_shape(ctx_factory,
                                  grid_shape,
                                  proc_shape,
                                  dtype,
                                  op,
                                  _grid_shape,
                                  timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    h = 1
    grid_shape = _grid_shape or grid_shape
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    from pystella import Field
    reducers = {}
    reducers["avg"] = [(Field("f"), op)]

    reducer = ps.Reduction(mpi, reducers)

    f = clr.rand(queue, rank_shape, dtype=dtype)
    result = reducer(queue, f=f)
    avg = result["avg"]

    avg_test = reducer.reduce_array(f, op)
    if op == "avg":
        avg_test /= np.product(grid_shape)

    rtol = 5e-14 if dtype == np.float64 else 1e-5
    assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \
        f"{op} reduction innaccurate for {grid_shape=}, {proc_shape=}"

    # test call to reducer with new shape
    grid_shape = tuple(Ni // 2 for Ni in grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)
    f = clr.rand(queue, rank_shape, dtype=dtype)
    result = reducer(queue, f=f)
    avg = result["avg"]

    avg_test = reducer.reduce_array(f, op)
    if op == "avg":
        avg_test /= np.product(grid_shape)

    rtol = 5e-14 if dtype == np.float64 else 1e-5
    assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \
        f"{op} reduction w/new shape innaccurate for {grid_shape=}, {proc_shape=}"
Пример #11
0
    def make_lhs_kernel(self, MapKernel, **kwargs):
        tmp_dict = {}
        lhs_dict = {}
        from pymbolic import var
        tmp_lhs = var("tmp_lhs")
        for i, (f, (lhs, rho)) in enumerate(self.lhs_dict.items()):
            tmp_dict[tmp_lhs[i]] = lhs
            resid = Field("r_" + f.child.name, offset="h")
            lhs_dict[rho] = resid + tmp_lhs[i]

        args = self.unknown_args + self.rho_args + self.residual_args
        self.lhs_correction = MapKernel(lhs_dict,
                                        tmp_instructions=tmp_dict,
                                        args=args,
                                        **kwargs)
Пример #12
0
    def make_resid_stats(self, decomp, queue, dtype, **kwargs):
        reducers = {}
        avg_reducers = {}
        # from pymbolic.functions import fabs
        from pymbolic import var
        fabs = var("fabs")
        for arg in self.unknown_args:
            f = arg.name
            resid = Field("r_" + f, offset="h")
            reducers[f] = [(fabs(resid), "max"), (resid**2, "avg")]
            avg_reducers[f] = [(resid, "avg")]

        args = self.residual_args
        from pystella import Reduction
        self.resid_stats = Reduction(decomp, reducers, args=args, **kwargs)
        self.avg_resid = Reduction(decomp, avg_reducers, args=args, **kwargs)
Пример #13
0
    def rhs_dict(self):
        hij = self.hij
        H = Field("hubble", indices=[])

        rhs_dict = {}

        for i in range(1, 4):
            for j in range(i, 4):
                fld = tensor_index(i, j)
                Sij = sum(
                    sector.stress_tensor(i, j, drop_trace=True)
                    for sector in self.sectors)
                rhs_dict[hij[fld]] = hij.dot[fld]
                rhs_dict[hij.dot[fld]] = (hij.lap[fld] - 2 * H * hij.dot[fld] +
                                          16 * np.pi * Sij)

        return rhs_dict
Пример #14
0
    def stress_tensor(self, mu, nu, drop_trace=False):
        f = self.f
        a = Field("a", indices=[])

        Tmunu = sum(
            f.d(fld, mu) * f.d(fld, nu) for fld in range(self.nscalars))

        if drop_trace:
            return Tmunu
        else:
            metric = np.diag(
                (-1 / a**2, 1 / a**2, 1 / a**2, 1 / a**2))  # contravariant
            lag = (-sum(
                sum(metric[mu, nu] * f.d(fld, mu) * f.d(fld, nu)
                    for mu in range(4) for nu in range(4))
                for fld in range(self.nscalars)) / 2 - self.potential(self.f))
            metric = np.diag((-a**2, a**2, a**2, a**2))  # covariant
            return Tmunu + metric[mu, nu] * lag
Пример #15
0
    def __init__(self, decomp, halo_shape, **kwargs):
        self.min_max = kwargs.pop("max_min", False)

        from pystella import Field
        f = Field("f", offset="h")
        reducers = {}
        reducers["mean"] = [f]
        reducers["variance"] = [f**2]
        if self.min_max:
            reducers["max"] = [(f, "max")]
            reducers["min"] = [(f, "min")]
            # from pymbolic.functions import fabs
            from pymbolic import var
            fabs = var("fabs")
            reducers["abs_max"] = [(fabs(f), "max")]
            reducers["abs_min"] = [(fabs(f), "min")]
        self.reducers = reducers

        super().__init__(decomp, reducers, halo_shape=halo_shape, **kwargs)
Пример #16
0
def InterpolationBase(even_coefs, odd_coefs, StencilKernel, halo_shape,
                      **kwargs):
    """
    A base function for generating a restriction kernel.

    :arg even_coefs: The coefficients representing the interpolation formula
        for gridpoints on the coarse and fine grid which coincide in space.
        Follows the convention of :func:`pystella.derivs.centered_diff`
        (since the restriction is applied recursively in each dimension).

    :arg odd_coefs: Same as ``even_coefs``, but for points on the fine grid which
        lie between points on the coarse grid.

    :arg StencilKernel: The stencil mapper to create an instance of.
        Defaults to :class:`~pystella.Stencil`.

    :arg halo_shape: The number of halo layers on (both sides of) each axis of
        the computational grid.
        Currently must be an :class:`int`.

    :arg correct: A :class:`bool` determining whether to produce a kernel which
        corrects an output array by the interpolated array, or to only perform
        strict interpolation.
        Defaults to *False*.

    :returns: An instance of ``StencilKernel`` which executes the requested
        interpolation.
    """

    from pymbolic import parse, var
    i, j, k = parse("i, j, k")
    f1 = Field("f1", offset="h")

    tmp_insns = {}
    tmp = var("tmp")

    import itertools
    for parity in tuple(itertools.product((0, 1), (0, 1), (0, 1))):
        result = 0
        for a, c_a in odd_coefs.items() if parity[0] else even_coefs.items():
            for b, c_b in odd_coefs.items() if parity[1] else even_coefs.items(
            ):
                for c, c_c in odd_coefs.items(
                ) if parity[2] else even_coefs.items():
                    f2 = Field("f2",
                               offset="h",
                               indices=((i + a) // 2, (j + b) // 2,
                                        (k + c) // 2))
                    result += c_a * c_b * c_c * f2

        tmp_insns[tmp[parity]] = result

    from pymbolic.primitives import Remainder
    a, b, c = (Remainder(ind, 2) for ind in (i, j, k))

    if kwargs.pop("correct", False):
        interp_dict = {f1: f1 + tmp[a, b, c]}
    else:
        interp_dict = {f1: tmp[a, b, c]}

    args = [
        lp.GlobalArg("f1", shape="(Nx+2*h, Ny+2*h, Nz+2*h)"),
        lp.GlobalArg("f2", shape="(Nx//2+2*h, Ny//2+2*h, Nz//2+2*h)")
    ]

    return StencilKernel(interp_dict,
                         tmp_instructions=tmp_insns,
                         args=args,
                         prefetch_args=["f2"],
                         halo_shape=halo_shape,
                         **kwargs)
Пример #17
0
def test_relax(ctx_factory,
               grid_shape,
               proc_shape,
               h,
               dtype,
               Solver,
               timing=False):
    if min(grid_shape) < 128:
        pytest.skip("test_relax needs larger grids, for now")

    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape))
    mpi = ps.DomainDecomposition(proc_shape, h, rank_shape)

    L = 10
    dx = L / grid_shape[0]
    dk = 2 * np.pi / L

    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)
    spectra = ps.PowerSpectra(mpi, fft, (dk, ) * 3, L**3)
    statistics = ps.FieldStatistics(mpi,
                                    h,
                                    rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    def get_laplacian(f):
        from pystella.derivs import _lap_coefs, centered_diff
        lap_coefs = _lap_coefs[h]
        from pymbolic import var
        return sum([
            centered_diff(f, lap_coefs, direction=mu, order=2)
            for mu in range(1, 4)
        ]) / var("dx")**2

    test_problems = {}

    from pystella import Field
    f = Field("f", offset="h")
    rho = Field("rho", offset="h")
    test_problems[f] = (get_laplacian(f), rho)

    f = Field("f2", offset="h")
    rho = Field("rho2", offset="h")
    test_problems[f] = (get_laplacian(f) - f, rho)

    solver = Solver(mpi,
                    queue,
                    test_problems,
                    halo_shape=h,
                    dtype=dtype,
                    fixed_parameters=dict(omega=1 / 2))

    def zero_mean_array():
        f0 = clr.rand(queue, grid_shape, dtype)
        f = clr.rand(queue, tuple(ni + 2 * h for ni in rank_shape), dtype)
        mpi.scatter_array(queue, f0, f, root=0)
        avg = statistics(f)["mean"]
        f = f - avg
        mpi.share_halos(queue, f)
        return f

    f = zero_mean_array()
    rho = zero_mean_array()
    tmp = cla.zeros_like(f)

    f2 = zero_mean_array()
    rho2 = zero_mean_array()
    tmp2 = cla.zeros_like(f)

    num_iterations = 1000
    errors = {"f": [], "f2": []}
    first_mode_zeroed = {"f": [], "f2": []}
    for i in range(0, num_iterations, 2):
        solver(mpi,
               queue,
               iterations=2,
               dx=np.array(dx),
               f=f,
               tmp_f=tmp,
               rho=rho,
               f2=f2,
               tmp_f2=tmp2,
               rho2=rho2)

        err = solver.get_error(queue,
                               f=f,
                               r_f=tmp,
                               rho=rho,
                               f2=f2,
                               r_f2=tmp2,
                               rho2=rho2,
                               dx=np.array(dx))
        for k, v in err.items():
            errors[k].append(v)

        for key, resid in zip(["f", "f2"], [tmp, tmp2]):
            spectrum = spectra(resid, k_power=0)
            if mpi.rank == 0:
                max_amp = np.max(spectrum)
                first_zero = np.argmax(spectrum[1:] < 1e-30 * max_amp)
                first_mode_zeroed[key].append(first_zero)

    for k, errs in errors.items():
        errs = np.array(errs)
        iters = np.arange(1, errs.shape[0] + 1)
        assert (errs[10:, 0] * iters[10:] / errs[0, 0] < 1.).all(), \
            "relaxation not converging at least linearly for " \
            f"{grid_shape=}, {h=}, {proc_shape=}"

    first_mode_zeroed = mpi.bcast(first_mode_zeroed, root=0)
    for k, x in first_mode_zeroed.items():
        x = np.array(list(x))[2:]
        assert (x[1:] <= x[:-1]).all() and np.min(x) < np.max(x) / 5, \
            f"relaxation not smoothing error {grid_shape=}, {h=}, {proc_shape=}"
Пример #18
0
    def __init__(self, fft, effective_k, dk, dx):
        self.fft = fft

        if not callable(effective_k):
            if effective_k != 0:
                from pystella.derivs import FirstCenteredDifference
                h = effective_k
                effective_k = FirstCenteredDifference(h).get_eigenvalues
            else:

                def effective_k(k, dx):  # pylint: disable=function-redefined
                    return k

        queue = self.fft.sub_k["momenta_x"].queue
        sub_k = list(x.get().astype("int") for x in self.fft.sub_k.values())
        eff_mom_names = ("eff_mom_x", "eff_mom_y", "eff_mom_z")
        self.eff_mom = {}
        for mu, (name, kk) in enumerate(zip(eff_mom_names, sub_k)):
            eff_k = effective_k(dk[mu] * kk.astype(fft.rdtype), dx[mu])
            eff_k[abs(sub_k[mu]) == fft.grid_shape[mu] // 2] = 0.
            eff_k[sub_k[mu] == 0] = 0.

            import pyopencl.array as cla
            self.eff_mom[name] = cla.to_device(queue, eff_k)

        from pymbolic import var, parse
        from pymbolic.primitives import If, Comparison, LogicalAnd
        from pystella import Field
        indices = parse("i, j, k")
        eff_k = tuple(
            var(array)[mu] for array, mu in zip(eff_mom_names, indices))
        fabs, sqrt, conj = parse("fabs, sqrt, conj")
        kmag = sqrt(sum(kk**2 for kk in eff_k))

        from pystella import ElementWiseMap
        vector = Field("vector", shape=(3, ))
        vector_T = Field("vector_T", shape=(3, ))

        kvec_zero = LogicalAnd(
            tuple(Comparison(fabs(eff_k[mu]), "<", 1e-14) for mu in range(3)))

        # note: write all output via private temporaries to allow for in-place

        div = var("div")
        div_insn = [(div, sum(eff_k[mu] * vector[mu] for mu in range(3)))]
        self.transversify_knl = ElementWiseMap(
            {
                vector_T[mu]: If(kvec_zero, 0,
                                 vector[mu] - eff_k[mu] / kmag**2 * div)
                for mu in range(3)
            },
            tmp_instructions=div_insn,
            lsize=(32, 1, 1),
            rank_shape=fft.shape(True),
        )

        import loopy as lp

        def assign(asignee, expr, **kwargs):
            default = dict(within_inames=frozenset(("i", "j", "k")),
                           no_sync_with=[("*", "any")])
            default.update(kwargs)
            return lp.Assignment(asignee, expr, **default)

        kmag, Kappa = parse("kmag, Kappa")
        eps_insns = [
            assign(kmag, sqrt(sum(kk**2 for kk in eff_k))),
            assign(Kappa, sqrt(sum(kk**2 for kk in eff_k[:2])))
        ]

        zero = fft.cdtype.type(0)
        kx_ky_zero = LogicalAnd(
            tuple(Comparison(fabs(eff_k[mu]), "<", 1e-10) for mu in range(2)))
        kz_nonzero = Comparison(fabs(eff_k[2]), ">", 1e-10)

        eps = var("eps")
        eps_insns.extend([
            assign(
                eps[0],
                If(kx_ky_zero, If(kz_nonzero, fft.cdtype.type(1 / 2**.5),
                                  zero),
                   (eff_k[0] * eff_k[2] / kmag - 1j * eff_k[1]) / Kappa /
                   2**.5)),
            assign(
                eps[1],
                If(kx_ky_zero,
                   If(kz_nonzero, fft.cdtype.type(1j / 2**(1 / 2)),
                      zero), (eff_k[1] * eff_k[2] / kmag + 1j * eff_k[0]) /
                   Kappa / 2**.5)),
            assign(eps[2], If(kx_ky_zero, zero, -Kappa / kmag / 2**.5))
        ])

        plus, minus, lng = Field("plus"), Field("minus"), Field("lng")

        plus_tmp, minus_tmp = parse("plus_tmp, minus_tmp")
        pol_isns = [(plus_tmp,
                     sum(vector[mu] * conj(eps[mu]) for mu in range(3))),
                    (minus_tmp, sum(vector[mu] * eps[mu] for mu in range(3)))]

        args = [
            lp.TemporaryVariable("kmag"),
            lp.TemporaryVariable("Kappa"),
            lp.TemporaryVariable("eps", shape=(3, )), ...
        ]

        self.vec_to_pol_knl = ElementWiseMap(
            {
                plus: plus_tmp,
                minus: minus_tmp
            },
            tmp_instructions=eps_insns + pol_isns,
            args=args,
            lsize=(32, 1, 1),
            rank_shape=fft.shape(True),
        )

        vector_tmp = var("vector_tmp")
        vec_insns = [(vector_tmp[mu], plus * eps[mu] + minus * conj(eps[mu]))
                     for mu in range(3)]

        self.pol_to_vec_knl = ElementWiseMap(
            {vector[mu]: vector_tmp[mu]
             for mu in range(3)},
            tmp_instructions=eps_insns + vec_insns,
            args=args,
            lsize=(32, 1, 1),
            rank_shape=fft.shape(True),
        )

        ksq = sum(kk**2 for kk in eff_k)
        lng_rhs = If(kvec_zero, 0, -div / ksq * 1j)
        self.vec_decomp_knl = ElementWiseMap(
            {
                plus: plus_tmp,
                minus: minus_tmp,
                lng: lng_rhs
            },
            tmp_instructions=eps_insns + pol_isns + div_insn,
            args=args,
            lsize=(32, 1, 1),
            rank_shape=fft.shape(True),
        )
        lng_rhs = If(kvec_zero, 0, -div / ksq**.5 * 1j)
        self.vec_decomp_knl_times_abs_k = ElementWiseMap(
            {
                plus: plus_tmp,
                minus: minus_tmp,
                lng: lng_rhs
            },
            tmp_instructions=eps_insns + pol_isns + div_insn,
            args=args,
            lsize=(32, 1, 1),
            rank_shape=fft.shape(True),
        )

        from pystella.sectors import tensor_index as tid

        eff_k_hat = tuple(kk / sqrt(sum(kk**2 for kk in eff_k))
                          for kk in eff_k)
        hij = Field("hij", shape=(6, ))
        hij_TT = Field("hij_TT", shape=(6, ))

        Pab = var("P")
        Pab_insns = [(Pab[tid(a, b)], (If(Comparison(a, "==", b), 1, 0) -
                                       eff_k_hat[a - 1] * eff_k_hat[b - 1]))
                     for a in range(1, 4) for b in range(a, 4)]

        hij_TT_tmp = var("hij_TT_tmp")
        TT_insns = [(hij_TT_tmp[tid(a, b)],
                     sum((Pab[tid(a, c)] * Pab[tid(d, b)] -
                          Pab[tid(a, b)] * Pab[tid(c, d)] / 2) * hij[tid(c, d)]
                         for c in range(1, 4) for d in range(1, 4)))
                    for a in range(1, 4) for b in range(a, 4)]
        # note: where conditionals (branch divergence) go can matter:
        # this kernel is twice as fast when putting the branching in the global
        # write, rather than when setting hij_TT_tmp
        write_insns = [(hij_TT[tid(a,
                                   b)], If(kvec_zero, 0, hij_TT_tmp[tid(a,
                                                                        b)]))
                       for a in range(1, 4) for b in range(a, 4)]
        self.tt_knl = ElementWiseMap(
            write_insns,
            tmp_instructions=Pab_insns + TT_insns,
            lsize=(32, 1, 1),
            rank_shape=fft.shape(True),
        )

        tensor_to_pol_insns = {
            plus:
            sum(hij[tid(c, d)] * conj(eps[c - 1]) * conj(eps[d - 1])
                for c in range(1, 4) for d in range(1, 4)),
            minus:
            sum(hij[tid(c, d)] * eps[c - 1] * eps[d - 1] for c in range(1, 4)
                for d in range(1, 4))
        }
        self.tensor_to_pol_knl = ElementWiseMap(
            tensor_to_pol_insns,
            tmp_instructions=eps_insns,
            args=args,
            lsize=(32, 1, 1),
            rank_shape=fft.shape(True),
        )

        pol_to_tensor_insns = {
            hij[tid(a, b)]: (plus * eps[a - 1] * eps[b - 1] +
                             minus * conj(eps[a - 1]) * conj(eps[b - 1]))
            for a in range(1, 4) for b in range(a, 4)
        }
        self.pol_to_tensor_knl = ElementWiseMap(
            pol_to_tensor_insns,
            tmp_instructions=eps_insns,
            args=args,
            lsize=(32, 1, 1),
            rank_shape=fft.shape(True),
        )
Пример #19
0
def RestrictionBase(coefs, StencilKernel, halo_shape, **kwargs):
    """
    A base function for generating a restriction kernel.

    :arg coefs: The coefficients representing the restriction formula.
        Follows the convention of :func:`pystella.derivs.centered_diff`
        (since the restriction is applied recursively in each dimension).

    :arg StencilKernel: The stencil mapper to create an instance of.
        Defaults to :class:`~pystella.Stencil`.

    :arg halo_shape: The number of halo layers on (both sides of) each axis of
        the computational grid.
        Currently must be an :class:`int`.

    :arg lsize: The shape of prefetched arrays in shared memory.
        See :class:`~pystella.ElementWiseMap`.
        Defaults to ``(4, 4, 4)``.

    :arg correct: A :class:`bool` determining whether to produce a kernel which
        corrects an output array by the restricted array, or to only perform
        strict restriction.
        Defaults to *False*.

    :returns: An instance of ``StencilKernel`` which executes the requested
        restriction.
    """

    lsize = kwargs.pop("lsize", (4, 4, 4))

    # ensure grid dimensions are *not* passed, as they will be misinterpreted
    for N in ["Nx", "Ny", "Nz"]:
        _ = kwargs.pop(N, None)

    restrict_coefs = {}
    for a, c_a in coefs.items():
        for b, c_b in coefs.items():
            for c, c_c in coefs.items():
                restrict_coefs[(a, b, c)] = c_a * c_b * c_c

    from pymbolic import parse, var
    i, j, k = parse("i, j, k")
    f1 = Field("f1", offset="h", indices=(2 * i, 2 * j, 2 * k))
    f2 = Field("f2", offset="h")
    tmp = var("tmp")

    tmp_dict = {tmp: expand_stencil(f1, restrict_coefs)}

    if kwargs.pop("correct", False):
        restrict_dict = {f2: f2 - tmp}
    else:
        restrict_dict = {f2: tmp}

    args = [
        lp.GlobalArg("f1", shape="(2*Nx+2*h, 2*Ny+2*h, 2*Nz+2*h)"),
        lp.GlobalArg("f2", shape="(Nx+2*h, Ny+2*h, Nz+2*h)")
    ]

    if isinstance(StencilKernel, Stencil):
        return StencilKernel(restrict_dict,
                             tmp_instructions=tmp_dict,
                             args=args,
                             prefetch_args=["f1"],
                             halo_shape=halo_shape,
                             lsize=lsize,
                             **kwargs)
    else:
        return StencilKernel(restrict_dict,
                             tmp_instructions=tmp_dict,
                             args=args,
                             halo_shape=halo_shape,
                             lsize=lsize,
                             **kwargs)
Пример #20
0
def test_multigrid(ctx_factory, grid_shape, proc_shape, h, dtype, Solver, MG,
                   timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape))
    mpi = ps.DomainDecomposition(proc_shape, h, rank_shape)

    L = 10
    dx = L / grid_shape[0]

    statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    def get_laplacian(f):
        from pystella.derivs import _lap_coefs, centered_diff
        lap_coefs = _lap_coefs[h]
        from pymbolic import var
        return sum([centered_diff(f, lap_coefs, direction=mu, order=2)
                    for mu in range(1, 4)]) / var("dx")**2

    test_problems = {}

    from pystella import Field
    f = Field("f", offset="h")
    rho = Field("rho", offset="h")
    test_problems[f] = (get_laplacian(f), rho)

    f = Field("f2", offset="h")
    rho = Field("rho2", offset="h")
    test_problems[f] = (get_laplacian(f) - f, rho)

    solver = Solver(mpi, queue, test_problems, halo_shape=h, dtype=dtype,
                    fixed_parameters=dict(omega=1/2))
    mg = MG(solver=solver, halo_shape=h, dtype=dtype)

    def zero_mean_array():
        f0 = clr.rand(queue, grid_shape, dtype)
        f = clr.rand(queue, tuple(ni + 2*h for ni in rank_shape), dtype)
        mpi.scatter_array(queue, f0, f, root=0)
        avg = statistics(f)["mean"]
        f = f - avg
        mpi.share_halos(queue, f)
        return f

    f = zero_mean_array()
    rho = zero_mean_array()

    f2 = zero_mean_array()
    rho2 = zero_mean_array()

    poisson_errs = []
    helmholtz_errs = []
    num_v_cycles = 15 if MG == MultiGridSolver else 10
    for i in range(num_v_cycles):
        errs = mg(mpi, queue, dx0=dx, f=f, rho=rho, f2=f2, rho2=rho2)
        poisson_errs.append(errs[-1][-1]["f"])
        helmholtz_errs.append(errs[-1][-1]["f2"])

    for name, cycle_errs in zip(["poisson", "helmholtz"],
                                [poisson_errs, helmholtz_errs]):
        tol = 1e-6 if MG == MultiGridSolver else 1e-15
        assert cycle_errs[-1][1] < tol and cycle_errs[-2][1] < 10*tol, \
            "multigrid solution to {name} eqn is inaccurate for " \
            f"{grid_shape=}, {h=}, {proc_shape=}"