Пример #1
0
 def build_traversal(self):
     from boxtree.traversal import FMMTraversalBuilder
     return FMMTraversalBuilder(
         self.cl_context,
         well_sep_is_n_away=self._well_sep_is_n_away,
         from_sep_smaller_crit=self._from_sep_smaller_crit,
     )
Пример #2
0
    def __init__(self,
                 ctx_getter=cl.create_some_context,
                 enable_extents=False):
        ctx = ctx_getter()
        queue = cl.CommandQueue(ctx)

        from pyopencl.characterize import has_struct_arg_count_bug
        if has_struct_arg_count_bug(queue.device):
            pytest.xfail(
                "won't work on devices with the struct arg count issue")

        logging.basicConfig(level=logging.INFO)

        dims = 2
        nsources = 9000000
        ntargets = 9000000
        dtype = np.float32

        from boxtree.fmm import drive_fmm
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = p_normal(queue, ntargets, dims, dtype, seed=15)

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=12)

        if enable_extents:
            target_radii = 2**rng.uniform(queue,
                                          ntargets,
                                          dtype=dtype,
                                          a=-10,
                                          b=0)
        else:
            target_radii = None

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        tree, _ = tb(
            queue,
            sources,
            #targets=targets,
            max_particles_in_box=30,
            #target_radii=target_radii,
            #stick_out_factor=0.25,
            debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tbuild = FMMTraversalBuilder(ctx)
        trav, _ = tbuild(queue, tree, debug=True)

        weights = np.ones(nsources)
        weights_sum = np.sum(weights)

        host_trav = trav.get(queue=queue)
        host_tree = host_trav.tree
        self.tree = host_tree
        self.trav = host_trav

        self.input = [host_tree, weights, weights_sum, host_trav]
        self.pot = None
Пример #3
0
def test_plot_traversal(ctx_factory, well_sep_is_n_away=1, plot=False):
    pytest.importorskip("matplotlib")
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    #for dims in [2, 3]:
    for dims in [2]:
        nparticles = 10**4
        dtype = np.float64

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=15)

        from pytools.obj_array import make_obj_array
        particles = make_obj_array([
            rng.normal(queue, nparticles, dtype=dtype)
            for i in range(dims)])

        # if do_plot:
        #     pt.plot(particles[0].get(), particles[1].get(), "x")

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        queue.finish()
        tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away)
        trav, _ = tg(queue, tree)

        tree = tree.get(queue=queue)
        trav = trav.get(queue=queue)

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        #plotter.draw_box_numbers()
        plotter.set_bounding_box()

        from random import randrange, seed  # noqa
        seed(7)

        from boxtree.visualization import draw_box_lists

        #draw_box_lists(randrange(tree.nboxes))

        if well_sep_is_n_away == 1:
            draw_box_lists(plotter, trav, 380)
        elif well_sep_is_n_away == 2:
            draw_box_lists(plotter, trav, 320)
        #plotter.draw_box_numbers()

        if plot:
            import matplotlib.pyplot as pt
            pt.gca().set_xticks([])
            pt.gca().set_yticks([])

            pt.show()
Пример #4
0
def test_interaction_list_particle_count_thresholding(ctx_getter,
                                                      enable_extents):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    logging.basicConfig(level=logging.INFO)

    dims = 2
    nsources = 1000
    ntargets = 1000
    dtype = np.float

    max_particles_in_box = 30
    # Ensure that we have underfilled boxes.
    from_sep_smaller_min_nsources_cumul = 1 + max_particles_in_box

    from boxtree.fmm import drive_fmm
    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = p_normal(queue, ntargets, dims, dtype, seed=15)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=12)

    if enable_extents:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=max_particles_in_box,
                 target_radii=target_radii,
                 debug=True,
                 stick_out_factor=0.25)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(
        queue,
        tree,
        debug=True,
        _from_sep_smaller_min_nsources_cumul=from_sep_smaller_min_nsources_cumul
    )

    weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    wrangler = ConstantOneExpansionWrangler(host_tree)

    pot = drive_fmm(host_trav, wrangler, weights)

    assert (pot == weights_sum).all()
Пример #5
0
def test_fmm_float32(ctx_getter=cl.create_some_context, enable_extents=True):
    from time import time


    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    from pyopencl.characterize import has_struct_arg_count_bug
    if has_struct_arg_count_bug(queue.device):
        pytest.xfail("won't work on devices with the struct arg count issue")

    logging.basicConfig(level=logging.INFO)

    dims = 2
    nsources = 3000000
    ntargets = 3000000
    dtype = np.float32

    from boxtree.fmm import drive_fmm
    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = p_normal(queue, ntargets, dims, dtype, seed=15)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=12)

    if enable_extents:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources,
                 targets=targets,
            max_particles_in_box=30,
            target_radii=target_radii,stick_out_factor=0.25,
            debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    weights = np.ones(nsources)

    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    wrangler = ConstantOneExpansionWrangler(host_tree)

    ti = time()
    pot = drive_fmm(host_trav, wrangler, weights)
    print(time() - ti)
    assert (pot == weights_sum).all()
Пример #6
0
def test_sumpy_fmm_timing_data_collection(ctx_factory):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_factory()
    queue = cl.CommandQueue(
            ctx,
            properties=cl.command_queue_properties.PROFILING_ENABLE)

    nsources = 500
    dtype = np.float64

    from boxtree.tools import (
            make_normal_particle_array as p_normal)

    knl = LaplaceKernel(2)
    local_expn_class = VolumeTaylorLocalExpansion
    mpole_expn_class = VolumeTaylorMultipoleExpansion
    order = 1

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources,
            max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    out_kernels = [knl]

    from functools import partial

    from sumpy.fmm import SumpyExpansionWranglerCodeContainer
    wcc = SumpyExpansionWranglerCodeContainer(
            ctx,
            partial(mpole_expn_class, knl),
            partial(local_expn_class, knl),
            out_kernels)

    wrangler = wcc.get_wrangler(queue, tree, dtype,
            fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order)
    from boxtree.fmm import drive_fmm

    timing_data = {}
    pot, = drive_fmm(trav, wrangler, (weights,), timing_data=timing_data)
    print(timing_data)
    assert timing_data
Пример #7
0
    def __call__(self, queue=None):
        if queue is None:
            queue = cl.CommandQueue(self.cl_context)

        from boxtree import TreeBuilder
        tb = TreeBuilder(self.cl_context)

        q_points = self._get_q_points(queue)

        tree, _ = tb(queue, particles=q_points, targets=q_points,
                     bbox=self._bbox, max_particles_in_box=(
                         (self.n_q_points_per_cell**self.dim) * (2**self.dim)
                         - 1),
                     kind="adaptive-level-restricted")

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(self.cl_context)
        trav, _ = tg(queue, tree)

        return BoxFMMGeometryData(
                self.cl_context,
                q_points, self._get_q_weights(queue),
                tree, trav)
Пример #8
0
def test_pyfmmlib_fmm(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dims = 2
    dtype = np.float64

    helmholtz_k = 2

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0]))

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    #weights = np.ones(nsources)

    logger.info("computing direct (reference) result")

    from pyfmmlib import hpotgrad2dall_vec
    ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False,
                                      ifhess=False,
                                      sources=sources_host.T,
                                      charge=weights,
                                      targets=targets_host.T,
                                      zk=helmholtz_k)

    from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler
    wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10)

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(trav, wrangler, weights)

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("relative l2 error: %g" % rel_err)
    assert rel_err < 1e-5
Пример #9
0
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
                          who_has_extent, source_gen, target_gen, filter_kind,
                          well_sep_is_n_away, extent_norm,
                          from_sep_smaller_crit):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array "
                    "generation")

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=12)
    if sources_have_extent:
        source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 source_radii=source_radii,
                 target_radii=target_radii,
                 debug=True,
                 stick_out_factor=0.25,
                 extent_norm=extent_norm)
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt
        pt.show()

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx,
                                 well_sep_is_n_away=well_sep_is_n_away,
                                 from_sep_smaller_crit=from_sep_smaller_crit)
    trav, _ = tbuild(queue, tree, debug=True)

    if who_has_extent:
        pre_merge_trav = trav
        trav = trav.merge_close_lists(queue)

    #weights = np.random.randn(nsources)
    weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if who_has_extent:
        pre_merge_host_trav = pre_merge_trav.get(queue=queue)

    from boxtree.tree import ParticleListFilter
    plfilt = ParticleListFilter(ctx)

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \
                .astype(np.int8)
        if filter_kind == "user":
            filtered_targets = plfilt.filter_target_lists_in_user_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                host_tree, filtered_targets.get(queue=queue))
        elif filter_kind == "tree":
            filtered_targets = plfilt.filter_target_lists_in_tree_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                host_tree, filtered_targets.get(queue=queue))
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)
        flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8)
        flags.fill(1)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(
            wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(host_trav, wrangler, weights)

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8

    # {{{ build, evaluate matrix (and identify incorrect interactions)

    if 0 and not good:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 0:
            pt.imshow(mat)
            pt.colorbar()
            pt.show()

        incorrect_tgts, incorrect_srcs = np.where(mat != 1)

        if 1 and len(incorrect_tgts):
            from boxtree.visualization import TreePlotter
            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_incorrect_tgts = \
                    host_tree.indices_to_tree_target_order(incorrect_tgts)
            tree_order_incorrect_srcs = \
                    host_tree.indices_to_tree_source_order(incorrect_srcs)

            src_boxes = [
                host_tree.find_box_nr_for_source(i)
                for i in tree_order_incorrect_srcs
            ]
            tgt_boxes = [
                host_tree.find_box_nr_for_target(i)
                for i in tree_order_incorrect_tgts
            ]
            print(src_boxes)
            print(tgt_boxes)

            # plot all sources/targets
            if 0:
                pt.plot(host_tree.targets[0],
                        host_tree.targets[1],
                        "v",
                        alpha=0.9)
                pt.plot(host_tree.sources[0],
                        host_tree.sources[1],
                        "gx",
                        alpha=0.9)

            # plot offending sources/targets
            if 0:
                pt.plot(host_tree.targets[0][tree_order_incorrect_tgts],
                        host_tree.targets[1][tree_order_incorrect_tgts], "rv")
                pt.plot(host_tree.sources[0][tree_order_incorrect_srcs],
                        host_tree.sources[1][tree_order_incorrect_srcs], "go")
            pt.gca().set_aspect("equal")

            from boxtree.visualization import draw_box_lists
            draw_box_lists(
                plotter, pre_merge_host_trav if who_has_extent else host_trav,
                22)
            # from boxtree.visualization import draw_same_level_non_well_sep_boxes
            # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2)

            pt.show()

    # }}}

    if 0 and not good:
        import matplotlib.pyplot as pt
        pt.plot(pot - weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt
        filt_targets = [
            host_tree.targets[0][flags.get() > 0],
            host_tree.targets[1][flags.get() > 0],
        ]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [
            filt_targets[0][bad],
            filt_targets[1][bad],
        ]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
Пример #10
0
def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dtype = np.float64

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0, 0])[:dims])

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    #weights = np.ones(nsources)

    if use_dipoles:
        np.random.seed(13)
        dipole_vec = np.random.randn(dims, nsources)
    else:
        dipole_vec = None

    if dims == 2 and helmholtz_k == 0:
        base_nterms = 20
    else:
        base_nterms = 10

    def fmm_level_to_nterms(tree, lev):
        result = base_nterms

        if lev < 3 and helmholtz_k:
            # exercise order-varies-by-level capability
            result += 5

        if use_dipoles:
            result += 1

        return result

    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler
    wrangler = FMMLibExpansionWrangler(trav.tree,
                                       helmholtz_k,
                                       fmm_level_to_nterms=fmm_level_to_nterms,
                                       dipole_vec=dipole_vec)

    from boxtree.fmm import drive_fmm

    timing_data = {}
    pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data)
    print(timing_data)
    assert timing_data

    # {{{ ref fmmlib computation

    logger.info("computing direct (reference) result")

    import pyfmmlib
    fmmlib_routine = getattr(
        pyfmmlib, "%spot%s%ddall%s_vec" %
        (wrangler.eqn_letter, "fld" if dims == 3 else "grad", dims,
         "_dp" if use_dipoles else ""))

    kwargs = {}
    if dims == 3:
        kwargs["iffld"] = False
    else:
        kwargs["ifgrad"] = False
        kwargs["ifhess"] = False

    if use_dipoles:
        if helmholtz_k == 0 and dims == 2:
            kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1])
        else:
            kwargs["dipstr"] = weights
            kwargs["dipvec"] = dipole_vec
    else:
        kwargs["charge"] = weights
    if helmholtz_k:
        kwargs["zk"] = helmholtz_k

    ref_pot = wrangler.finalize_potentials(
        fmmlib_routine(sources=sources_host.T,
                       targets=targets_host.T,
                       **kwargs)[0])

    rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
    logger.info("relative l2 error vs fmmlib direct: %g" % rel_err)
    assert rel_err < 1e-5, rel_err

    # }}}

    # {{{ check against sumpy

    try:
        import sumpy  # noqa
    except ImportError:
        have_sumpy = False
        from warnings import warn
        warn("sumpy unavailable: cannot compute independent reference "
             "values for pyfmmlib")
    else:
        have_sumpy = True

    if have_sumpy:
        from sumpy.kernel import (LaplaceKernel, HelmholtzKernel,
                                  DirectionalSourceDerivative)
        from sumpy.p2p import P2P

        sumpy_extra_kwargs = {}
        if helmholtz_k:
            knl = HelmholtzKernel(dims)
            sumpy_extra_kwargs["k"] = helmholtz_k
        else:
            knl = LaplaceKernel(dims)

        if use_dipoles:
            knl = DirectionalSourceDerivative(knl)
            sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec

        p2p = P2P(ctx, [knl], exclude_self=False)

        evt, (sumpy_ref_pot, ) = p2p(queue,
                                     targets,
                                     sources, [weights],
                                     out_host=True,
                                     **sumpy_extra_kwargs)

        sumpy_rel_err = (la.norm(pot - sumpy_ref_pot, np.inf) /
                         la.norm(sumpy_ref_pot, np.inf))

        logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err)
        assert sumpy_rel_err < 1e-5, sumpy_rel_err
Пример #11
0
def test_pyfmmlib_numerical_stability(ctx_factory, dims, helmholtz_k, order):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    nsources = 30
    dtype = np.float64

    # The input particles are arranged with geometrically increasing/decreasing
    # spacing along a line, to build a deep tree that stress-tests the
    # translations.
    particle_line = np.array([2**-i for i in range(nsources // 2)],
                             dtype=dtype)
    particle_line = np.hstack([particle_line, 3 - particle_line])
    zero = np.zeros(nsources, dtype=dtype)

    sources = np.vstack([particle_line, zero, zero])[:dims]

    targets = sources * (1 + 1e-3)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=2,
                 debug=True)

    assert tree.nlevels >= 15

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)
    weights = np.ones_like(sources[0])

    from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler,
                                              FMMLibRotationData)

    def fmm_level_to_nterms(tree, lev):
        return order

    wrangler = FMMLibExpansionWrangler(trav.tree,
                                       helmholtz_k,
                                       fmm_level_to_nterms=fmm_level_to_nterms,
                                       rotation_data=FMMLibRotationData(
                                           queue, trav))

    from boxtree.fmm import drive_fmm

    pot = drive_fmm(trav, wrangler, (weights, ))
    assert not np.isnan(pot).any()

    # {{{ ref fmmlib computation

    logger.info("computing direct (reference) result")

    ref_pot = get_fmmlib_ref_pot(wrangler, weights, sources, targets,
                                 helmholtz_k)

    rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
    logger.info("relative l2 error vs fmmlib direct: %g" % rel_err)

    if dims == 2:
        error_bound = (1 / 2)**(1 + order)
    else:
        error_bound = (3 / 4)**(1 + order)

    assert rel_err < error_bound, rel_err
Пример #12
0
def test_fmm_with_optimized_3d_m2l(ctx_factory, nsrcntgts, helmholtz_k,
                                   well_sep_is_n_away):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    dims = 3

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    nsources = ntargets = nsrcntgts // 2
    dtype = np.float64

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0, 0])[:dims])

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()

    base_nterms = 10

    def fmm_level_to_nterms(tree, lev):
        result = base_nterms

        if lev < 3 and helmholtz_k:
            # exercise order-varies-by-level capability
            result += 5

        return result

    from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler,
                                              FMMLibRotationData)

    baseline_wrangler = FMMLibExpansionWrangler(
        trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms)

    optimized_wrangler = FMMLibExpansionWrangler(
        trav.tree,
        helmholtz_k,
        fmm_level_to_nterms=fmm_level_to_nterms,
        rotation_data=FMMLibRotationData(queue, trav))

    from boxtree.fmm import drive_fmm

    baseline_timing_data = {}
    baseline_pot = drive_fmm(trav,
                             baseline_wrangler, (weights, ),
                             timing_data=baseline_timing_data)

    optimized_timing_data = {}
    optimized_pot = drive_fmm(trav,
                              optimized_wrangler, (weights, ),
                              timing_data=optimized_timing_data)

    baseline_time = baseline_timing_data["multipole_to_local"][
        "process_elapsed"]
    if baseline_time is not None:
        print("Baseline M2L time : %#.4g s" % baseline_time)

    opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"]
    if opt_time is not None:
        print("Optimized M2L time: %#.4g s" % opt_time)

    assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13)
Пример #13
0
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 1000
    ntargets = 300
    dtype = np.float64

    from boxtree.tools import (make_normal_particle_array as p_normal)

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)
    if 1:
        offset = np.zeros(knl.dim)
        offset[0] = 0.1

        targets = (p_normal(queue, ntargets, knl.dim, dtype, seed=18) + offset)

        del offset
    else:
        from sumpy.visualization import FieldPlotter
        fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200)
        from pytools.obj_array import make_obj_array
        targets = make_obj_array([fp.points[i] for i in range(knl.dim)])

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    # {{{ plot tree

    if 0:
        host_tree = tree.get()
        host_trav = trav.get()

        if 1:
            print("src_box", host_tree.find_box_nr_for_source(403))
            print("tgt_box", host_tree.find_box_nr_for_target(28))
            print(list(host_trav.target_or_target_parent_boxes).index(37))
            print(host_trav.get_box_list("sep_bigger", 22))

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(host_tree)
        plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
        plotter.set_bounding_box()
        plotter.draw_box_numbers()

        import matplotlib.pyplot as pt
        pt.show()

    # }}}

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx, seed=44)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    logger.info("computing direct (reference) result")

    from pytools.convergence import PConvergenceVerifier

    pconv_verifier = PConvergenceVerifier()

    extra_kwargs = {}
    dtype = np.float64
    order_values = [1, 2, 3]
    if isinstance(knl, HelmholtzKernel):
        extra_kwargs["k"] = 0.05
        dtype = np.complex128

        if knl.dim == 3:
            order_values = [1, 2]
        elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion):
            order_values = [10, 12]

    elif isinstance(knl, YukawaKernel):
        extra_kwargs["lam"] = 2
        dtype = np.complex128

        if knl.dim == 3:
            order_values = [1, 2]
        elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion):
            order_values = [10, 12]

    from functools import partial
    for order in order_values:
        out_kernels = [knl]

        from sumpy.fmm import SumpyExpansionWranglerCodeContainer
        wcc = SumpyExpansionWranglerCodeContainer(
            ctx, partial(mpole_expn_class, knl),
            partial(local_expn_class, knl), out_kernels)
        wrangler = wcc.get_wrangler(
            queue,
            tree,
            dtype,
            fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
            kernel_extra_kwargs=extra_kwargs)

        from boxtree.fmm import drive_fmm

        pot, = drive_fmm(trav, wrangler, weights)

        from sumpy import P2P
        p2p = P2P(ctx, out_kernels, exclude_self=False)
        evt, (ref_pot, ) = p2p(queue, targets, sources, (weights, ),
                               **extra_kwargs)

        pot = pot.get()
        ref_pot = ref_pot.get()

        rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
        logger.info("order %d -> relative l2 error: %g" % (order, rel_err))

        pconv_verifier.add_data_point(order, rel_err)

    print(pconv_verifier)
    pconv_verifier()
Пример #14
0
from pyopencl.clrandom import RanluxGenerator
rng = RanluxGenerator(queue, seed=15)

from pytools.obj_array import make_obj_array
particles = make_obj_array(
    [rng.normal(queue, nparticles, dtype=np.float64) for i in range(dims)])

# -----------------------------------------------------------------------------
# build tree and traversals (lists)
# -----------------------------------------------------------------------------
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tree, _ = tb(queue, particles, max_particles_in_box=30)

from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(ctx)
trav, _ = tg(queue, tree)

# ENDEXAMPLE

# -----------------------------------------------------------------------------
# plot the tree
# -----------------------------------------------------------------------------

import matplotlib.pyplot as pt

pt.plot(particles[0].get(), particles[1].get(), "x")

from boxtree.visualization import TreePlotter
plotter = TreePlotter(tree.get(queue=queue))
plotter.draw_tree(fill=False, edgecolor="black")
Пример #15
0
def test_estimate_calibration_params(ctx_factory):
    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler

    nsources_list = [1000, 2000, 3000, 4000]
    ntargets_list = [1000, 2000, 3000, 4000]
    dims = 3
    dtype = np.float64

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    traversals = []
    traversals_dev = []
    level_to_orders = []
    timing_results = []

    def fmm_level_to_nterms(tree, ilevel):
        return 10

    for nsources, ntargets in zip(nsources_list, ntargets_list):
        # {{{ Generate sources, targets and target_radii

        from boxtree.tools import make_normal_particle_array as p_normal
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = p_normal(queue, ntargets, dims, dtype, seed=18)

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=22)
        target_radii = rng.uniform(
            queue, ntargets, a=0, b=0.05, dtype=dtype
        ).get()

        # }}}

        # {{{ Generate tree and traversal

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)
        tree, _ = tb(
            queue, sources, targets=targets, target_radii=target_radii,
            stick_out_factor=0.15, max_particles_in_box=30, debug=True
        )

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
        trav_dev, _ = tg(queue, tree, debug=True)
        trav = trav_dev.get(queue=queue)

        traversals.append(trav)
        traversals_dev.append(trav_dev)

        # }}}

        wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms)
        level_to_orders.append(wrangler.level_nterms)

        timing_data = {}
        from boxtree.fmm import drive_fmm
        src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype)
        drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data)

        timing_results.append(timing_data)

    if SUPPORTS_PROCESS_TIME:
        time_field_name = "process_elapsed"
    else:
        time_field_name = "wall_elapsed"

    def test_params_sanity(test_params):
        param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l",
                       "c_l2p"]
        for name in param_names:
            assert isinstance(test_params[name], np.float64)

    def test_params_equal(test_params1, test_params2):
        param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l",
                       "c_l2p"]
        for name in param_names:
            assert test_params1[name] == test_params2[name]

    python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model)

    python_model_results = []

    for icase in range(len(traversals)-1):
        traversal = traversals[icase]
        level_to_order = level_to_orders[icase]

        python_model_results.append(python_cost_model.cost_per_stage(
            queue, traversal, level_to_order,
            _PythonFMMCostModel.get_unit_calibration_params(),
        ))

    python_params = python_cost_model.estimate_calibration_params(
        python_model_results, timing_results[:-1], time_field_name=time_field_name
    )

    test_params_sanity(python_params)

    cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model)

    cl_model_results = []

    for icase in range(len(traversals_dev)-1):
        traversal = traversals_dev[icase]
        level_to_order = level_to_orders[icase]

        cl_model_results.append(cl_cost_model.cost_per_stage(
            queue, traversal, level_to_order,
            FMMCostModel.get_unit_calibration_params(),
        ))

    cl_params = cl_cost_model.estimate_calibration_params(
        cl_model_results, timing_results[:-1], time_field_name=time_field_name
    )

    test_params_sanity(cl_params)

    if SUPPORTS_PROCESS_TIME:
        test_params_equal(cl_params, python_params)
Пример #16
0
def test_cost_model_op_counts_agree_with_constantone_wrangler(
        ctx_factory, nsources, ntargets, dims, dtype):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    from boxtree.tools import make_normal_particle_array as p_normal
    sources = p_normal(queue, nsources, dims, dtype, seed=16)
    targets = p_normal(queue, ntargets, dims, dtype, seed=19)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=20)
    target_radii = rng.uniform(queue, ntargets, a=0, b=0.04, dtype=dtype).get()

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)
    tree, _ = tb(
        queue, sources, targets=targets, target_radii=target_radii,
        stick_out_factor=0.15, max_particles_in_box=30, debug=True
    )

    from boxtree.traversal import FMMTraversalBuilder
    tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
    trav_dev, _ = tg(queue, tree, debug=True)
    trav = trav_dev.get(queue=queue)

    from boxtree.tools import ConstantOneExpansionWrangler
    wrangler = ConstantOneExpansionWrangler(trav.tree)

    timing_data = {}
    from boxtree.fmm import drive_fmm
    src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype)
    drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data)

    cost_model = FMMCostModel(
        translation_cost_model_factory=OpCountingTranslationCostModel
    )

    level_to_order = np.array([1 for _ in range(tree.nlevels)])

    modeled_time = cost_model.cost_per_stage(
        queue, trav_dev, level_to_order,
        FMMCostModel.get_unit_calibration_params(),
    )

    mismatches = []
    for stage in timing_data:
        if timing_data[stage]["ops_elapsed"] != modeled_time[stage]:
            mismatches.append(
                    (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage]))

    assert not mismatches, "\n".join(str(s) for s in mismatches)

    # {{{ Test per-box cost

    total_cost = 0.0
    for stage in timing_data:
        total_cost += timing_data[stage]["ops_elapsed"]

    per_box_cost = cost_model.cost_per_box(
        queue, trav_dev, level_to_order,
        FMMCostModel.get_unit_calibration_params(),
    )
    total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost)

    assert total_cost == (
            total_aggregate_cost
            + modeled_time["coarsen_multipoles"]
            + modeled_time["refine_locals"]
    )
Пример #17
0
def test_tree_connectivity(ctx_getter, dims, sources_are_targets):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    sources = make_normal_particle_array(queue, 1 * 10**5, dims, dtype)
    if sources_are_targets:
        targets = None
    else:
        targets = make_normal_particle_array(queue, 2 * 10**5, dims, dtype)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)
    tree, _ = tb(queue,
                 sources,
                 max_particles_in_box=30,
                 targets=targets,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tg = FMMTraversalBuilder(ctx)
    trav, _ = tg(queue, tree, debug=True)
    tree = tree.get(queue=queue)
    trav = trav.get(queue=queue)

    levels = tree.box_levels
    parents = tree.box_parent_ids.T
    children = tree.box_child_ids.T
    centers = tree.box_centers.T

    # {{{ parent and child relations, levels match up

    for ibox in range(1, tree.nboxes):
        # /!\ Not testing box 0, has no parents
        parent = parents[ibox]

        assert levels[parent] + 1 == levels[ibox]
        assert ibox in children[parent], ibox

    # }}}

    if 0:
        import matplotlib.pyplot as pt
        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        plotter.draw_box_numbers()
        plotter.set_bounding_box()
        pt.show()

    # {{{ neighbor_source_boxes (list 1) consists of source boxes

    for itgt_box, ibox in enumerate(trav.target_boxes):
        start, end = trav.neighbor_source_boxes_starts[itgt_box:itgt_box + 2]
        nbl = trav.neighbor_source_boxes_lists[start:end]

        if sources_are_targets:
            assert ibox in nbl

        for jbox in nbl:
            assert (0 == children[jbox]).all(), (ibox, jbox, children[jbox])

    logger.info("list 1 consists of source boxes")

    # }}}

    # {{{ separated siblings (list 2) are actually separated

    for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes):
        start, end = trav.sep_siblings_starts[itgt_box:itgt_box + 2]
        seps = trav.sep_siblings_lists[start:end]

        assert (levels[seps] == levels[tgt_ibox]).all()

        # three-ish box radii (half of size)
        mindist = 2.5 * 0.5 * 2**-int(levels[tgt_ibox]) * tree.root_extent

        icenter = centers[tgt_ibox]
        for jbox in seps:
            dist = la.norm(centers[jbox] - icenter)
            assert dist > mindist, (dist, mindist)

    logger.info("separated siblings (list 2) are actually separated")

    # }}}

    if sources_are_targets:
        # {{{ sep_{smaller,bigger} are duals of each other

        assert (trav.target_or_target_parent_boxes == np.arange(
            tree.nboxes)).all()

        # {{{ list 4 <= list 3
        for itarget_box, ibox in enumerate(trav.target_boxes):

            for ssn in trav.sep_smaller_by_level:
                start, end = ssn.starts[itarget_box:itarget_box + 2]

                for jbox in ssn.lists[start:end]:
                    rstart, rend = trav.sep_bigger_starts[jbox:jbox + 2]

                    assert ibox in trav.sep_bigger_lists[rstart:rend], (ibox,
                                                                        jbox)

        # }}}

        # {{{ list 4 <= list 3

        box_to_target_box_index = np.empty(tree.nboxes, tree.box_id_dtype)
        box_to_target_box_index.fill(-1)
        box_to_target_box_index[trav.target_boxes] = np.arange(
            len(trav.target_boxes), dtype=tree.box_id_dtype)

        assert (trav.source_boxes == trav.target_boxes).all()
        assert (trav.target_or_target_parent_boxes == np.arange(
            tree.nboxes, dtype=tree.box_id_dtype)).all()

        for ibox in range(tree.nboxes):
            start, end = trav.sep_bigger_starts[ibox:ibox + 2]

            for jbox in trav.sep_bigger_lists[start:end]:
                # In principle, entries of sep_bigger_lists are
                # source boxes. In this special case, source and target boxes
                # are the same thing (i.e. leaves--see assertion above), so we
                # may treat them as targets anyhow.

                jtgt_box = box_to_target_box_index[jbox]
                assert jtgt_box != -1

                good = False

                for ssn in trav.sep_smaller_by_level:
                    rstart, rend = ssn.starts[jtgt_box:jtgt_box + 2]
                    good = good or ibox in ssn.lists[rstart:rend]

                if not good:
                    from boxtree.visualization import TreePlotter
                    plotter = TreePlotter(tree)
                    plotter.draw_tree(fill=False, edgecolor="black", zorder=10)
                    plotter.set_bounding_box()

                    plotter.draw_box(ibox, facecolor='green', alpha=0.5)
                    plotter.draw_box(jbox, facecolor='red', alpha=0.5)

                    import matplotlib.pyplot as pt
                    pt.gca().set_aspect("equal")
                    pt.show()

                # This assertion failing means that ibox's list 4 contains a box
                # 'jbox' whose list 3 does not contain ibox.
                assert good, (ibox, jbox)

        # }}}

        logger.info("list 3, 4 are duals")

        # }}}

    # {{{ sep_smaller satisfies relative level assumption

    for itarget_box, ibox in enumerate(trav.target_boxes):
        for ssn in trav.sep_smaller_by_level:
            start, end = ssn.starts[itarget_box:itarget_box + 2]

            for jbox in ssn.lists[start:end]:
                assert levels[ibox] < levels[jbox]

    logger.info("list 3 satisfies relative level assumption")

    # }}}

    # {{{ sep_bigger satisfies relative level assumption

    for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes):
        start, end = trav.sep_bigger_starts[itgt_box:itgt_box + 2]

        for jbox in trav.sep_bigger_lists[start:end]:
            assert levels[tgt_ibox] > levels[jbox]

    logger.info("list 4 satisfies relative level assumption")

    # }}}

    # {{{ level_start_*_box_nrs lists make sense

    for name, ref_array in [("level_start_source_box_nrs", trav.source_boxes),
                            ("level_start_source_parent_box_nrs",
                             trav.source_parent_boxes),
                            ("level_start_target_box_nrs", trav.target_boxes),
                            ("level_start_target_or_target_parent_box_nrs",
                             trav.target_or_target_parent_boxes)]:
        level_starts = getattr(trav, name)
        for lev in range(tree.nlevels):
            start, stop = level_starts[lev:lev + 2]

            box_nrs = ref_array[start:stop]

            assert (tree.box_levels[box_nrs] == lev).all(), name
Пример #18
0
def main():

    print("*************************")
    print("* Setting up...")
    print("*************************")

    dim = 3
    # download precomputation results for the 3D Laplace kernel
    download_table = True
    table_filename = "nft_laplace3d.hdf5"

    logger.info("Using table cache: " + table_filename)

    q_order = 7  # quadrature order
    n_levels = 5
    use_multilevel_table = False

    adaptive_mesh = False
    n_refinement_loops = 100
    refined_n_cells = 5e5
    rratio_top = 0.2
    rratio_bot = 0.5

    dtype = np.float64

    m_order = 10  # multipole order
    force_direct_evaluation = False

    logger.info("Multipole order = " + str(m_order))
    logger.info("Quad order = " + str(q_order))
    logger.info("N_levels = " + str(n_levels))

    # a solution that is nearly zero at the boundary
    # exp(-40) = 4.25e-18
    alpha = 80
    x = pmbl.var("x")
    y = pmbl.var("y")
    z = pmbl.var("z")
    expp = pmbl.var("exp")

    norm2 = x**2 + y**2 + z**2
    source_expr = -(4 * alpha**2 * norm2 - 6 * alpha) * expp(-alpha * norm2)
    solu_expr = expp(-alpha * norm2)

    logger.info("Source expr: " + str(source_expr))
    logger.info("Solu expr: " + str(solu_expr))

    # bounding box
    a = -0.5
    b = 0.5
    root_table_source_extent = 2

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # logger.info("Summary of params: " + get_param_summary())
    source_eval = Eval(dim, source_expr, [x, y, z])

    # {{{ generate quad points

    import volumential.meshgen as mg

    # Show meshgen info
    mg.greet()

    mesh = mg.MeshGen3D(q_order, n_levels, a, b, queue=queue)
    if not adaptive_mesh:
        mesh.print_info()
        q_points = mesh.get_q_points()
        q_weights = mesh.get_q_weights()
    else:
        iloop = -1
        while mesh.n_active_cells() < refined_n_cells:
            iloop += 1
            cell_centers = mesh.get_cell_centers()
            cell_measures = mesh.get_cell_measures()
            density_vals = source_eval(
                queue,
                np.array([[center[d] for center in cell_centers]
                          for d in range(dim)]))
            crtr = np.abs(cell_measures * density_vals)
            mesh.update_mesh(crtr, rratio_top, rratio_bot)
            if iloop > n_refinement_loops:
                print("Max number of refinement loops reached.")
                break

        mesh.print_info()
        q_points = mesh.get_q_points()
        q_weights = mesh.get_q_weights()

    if 1:
        try:
            mesh.generate_gmsh("box_grid.msh")
        except Exception as e:
            print(e)
            pass

        legacy_msh_file = True
        if legacy_msh_file:
            import os

            os.system("gmsh box_grid.msh convert_grid -")

    assert len(q_points) == len(q_weights)
    assert q_points.shape[1] == dim

    q_points = np.ascontiguousarray(np.transpose(q_points))

    from pytools.obj_array import make_obj_array

    q_points = make_obj_array(
        [cl.array.to_device(queue, q_points[i]) for i in range(dim)])

    q_weights = cl.array.to_device(queue, q_weights)

    # }}}

    # {{{ discretize the source field

    logger.info("discretizing source field")
    source_vals = cl.array.to_device(
        queue,
        source_eval(queue, np.array([coords.get() for coords in q_points])))

    # particle_weigt = source_val * q_weight

    # }}} End discretize the source field

    # {{{ build tree and traversals

    from boxtree.tools import AXIS_NAMES

    axis_names = AXIS_NAMES[:dim]

    from pytools import single_valued

    coord_dtype = single_valued(coord.dtype for coord in q_points)
    from boxtree.bounding_box import make_bounding_box_dtype

    bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype)

    bbox = np.empty(1, bbox_type)
    for ax in axis_names:
        bbox["min_" + ax] = a
        bbox["max_" + ax] = b

    # tune max_particles_in_box to reconstruct the mesh
    # TODO: use points from FieldPlotter are used as target points for better
    # visuals
    print("building tree")
    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)
    tree, _ = tb(
        queue,
        particles=q_points,
        targets=q_points,
        bbox=bbox,
        max_particles_in_box=q_order**3 * 8 - 1,
        kind="adaptive-level-restricted",
    )

    from boxtree.traversal import FMMTraversalBuilder

    tg = FMMTraversalBuilder(ctx)
    trav, _ = tg(queue, tree)

    # }}} End build tree and traversals

    # {{{ build near field potential table

    from volumential.table_manager import NearFieldInteractionTableManager
    import os

    if download_table and (not os.path.isfile(table_filename)):
        import json
        with open("table_urls.json", 'r') as fp:
            urls = json.load(fp)

        print("Downloading table from %s" % urls['Laplace3D'])
        import subprocess
        subprocess.call(["wget", "-q", urls['Laplace3D'], table_filename])

    tm = NearFieldInteractionTableManager(table_filename,
                                          root_extent=root_table_source_extent,
                                          queue=queue)

    if use_multilevel_table:
        logger.info("Using multilevel tables")
        assert (abs(
            int((b - a) / root_table_source_extent) *
            root_table_source_extent - (b - a)) < 1e-15)
        nftable = []
        for lev in range(0, tree.nlevels + 1):
            print("Getting table at level", lev)
            tb, _ = tm.get_table(
                dim,
                "Laplace",
                q_order,
                source_box_level=lev,
                compute_method="DrosteSum",
                queue=queue,
                n_brick_quad_points=120,
                adaptive_level=False,
                use_symmetry=True,
                alpha=0,
                n_levels=1,
            )
            nftable.append(tb)

        print("Using table list of length", len(nftable))

    else:
        logger.info("Using single level table")
        force_recompute = False
        # 15 levels are sufficient (the inner most brick is 1e-15**3 in volume)
        nftable, _ = tm.get_table(
            dim,
            "Laplace",
            q_order,
            force_recompute=force_recompute,
            compute_method="DrosteSum",
            queue=queue,
            n_brick_quad_points=120,
            adaptive_level=False,
            use_symmetry=True,
            alpha=0,
            n_levels=1,
        )

    # }}} End build near field potential table

    # {{{ sumpy expansion for laplace kernel

    from sumpy.expansion import DefaultExpansionFactory
    from sumpy.kernel import LaplaceKernel

    knl = LaplaceKernel(dim)
    out_kernels = [knl]

    expn_factory = DefaultExpansionFactory()
    local_expn_class = expn_factory.get_local_expansion_class(knl)
    mpole_expn_class = expn_factory.get_multipole_expansion_class(knl)

    exclude_self = True
    from volumential.expansion_wrangler_fpnd import (
        FPNDExpansionWrangler, FPNDExpansionWranglerCodeContainer)

    wcc = FPNDExpansionWranglerCodeContainer(
        ctx,
        partial(mpole_expn_class, knl),
        partial(local_expn_class, knl),
        out_kernels,
        exclude_self=exclude_self,
    )

    if exclude_self:
        target_to_source = np.arange(tree.ntargets, dtype=np.int32)
        self_extra_kwargs = {"target_to_source": target_to_source}
    else:
        self_extra_kwargs = {}

    wrangler = FPNDExpansionWrangler(
        code_container=wcc,
        queue=queue,
        tree=tree,
        near_field_table=nftable,
        dtype=dtype,
        fmm_level_to_order=lambda kernel, kernel_args, tree, lev: m_order,
        quad_order=q_order,
        self_extra_kwargs=self_extra_kwargs,
    )

    # }}} End sumpy expansion for laplace kernel

    print("*************************")
    print("* Performing FMM ...")
    print("*************************")

    # {{{ conduct fmm computation

    from volumential.volume_fmm import drive_volume_fmm

    import time
    queue.finish()

    t0 = time.time()

    pot, = drive_volume_fmm(trav,
                            wrangler,
                            source_vals * q_weights,
                            source_vals,
                            direct_evaluation=force_direct_evaluation,
                            list1_only=False)

    t1 = time.time()

    print("Finished in %.2f seconds." % (t1 - t0))
    print("(%e points per second)" % (len(q_weights) / (t1 - t0)))

    # }}} End conduct fmm computation

    print("*************************")
    print("* Postprocessing ...")
    print("*************************")

    # {{{ postprocess and plot

    # print(pot)

    solu_eval = Eval(dim, solu_expr, [x, y, z])
    # x = q_points[0].get()
    # y = q_points[1].get()
    # z = q_points[2].get()
    test_x = np.array([0.0])
    test_y = np.array([0.0])
    test_z = np.array([0.0])
    test_nodes = make_obj_array(
        # get() first for CL compatibility issues
        [
            cl.array.to_device(queue, test_x),
            cl.array.to_device(queue, test_y),
            cl.array.to_device(queue, test_z),
        ])

    from volumential.volume_fmm import interpolate_volume_potential

    ze = solu_eval(queue, np.array([test_x, test_y, test_z]))
    zs = interpolate_volume_potential(test_nodes, trav, wrangler, pot).get()

    print_error = True
    if print_error:
        err = np.max(np.abs(ze - zs))
        print("Error =", err)

    # Boxtree
    if 0:
        import matplotlib.pyplot as plt

        if dim == 2:
            plt.plot(q_points[0].get(), q_points[1].get(), ".")

        from boxtree.visualization import TreePlotter

        plotter = TreePlotter(tree.get(queue=queue))
        plotter.draw_tree(fill=False, edgecolor="black")
        # plotter.draw_box_numbers()
        plotter.set_bounding_box()
        plt.gca().set_aspect("equal")

        plt.draw()
        plt.show()
        # plt.savefig("tree.png")

    # Direct p2p

    if 0:
        print("Performing P2P")
        pot_direct, = drive_volume_fmm(trav,
                                       wrangler,
                                       source_vals * q_weights,
                                       source_vals,
                                       direct_evaluation=True)
        zds = pot_direct.get()
        zs = pot.get()

        print("P2P-FMM diff =", np.max(np.abs(zs - zds)))

        print("P2P Error =", np.max(np.abs(ze - zds)))

    # Write vtk
    if 0:
        from meshmode.mesh.io import read_gmsh

        modemesh = read_gmsh("box_grid.msh", force_ambient_dim=None)
        from meshmode.discretization.poly_element import (
            LegendreGaussLobattoTensorProductGroupFactory, )
        from meshmode.array_context import PyOpenCLArrayContext
        from meshmode.discretization import Discretization

        actx = PyOpenCLArrayContext(queue)
        box_discr = Discretization(
            actx, modemesh,
            LegendreGaussLobattoTensorProductGroupFactory(q_order))

        box_nodes_x = box_discr.nodes()[0].with_queue(queue).get()
        box_nodes_y = box_discr.nodes()[1].with_queue(queue).get()
        box_nodes_z = box_discr.nodes()[2].with_queue(queue).get()
        box_nodes = make_obj_array(
            # get() first for CL compatibility issues
            [
                cl.array.to_device(queue, box_nodes_x),
                cl.array.to_device(queue, box_nodes_y),
                cl.array.to_device(queue, box_nodes_z),
            ])

        visual_order = 1
        from meshmode.discretization.visualization import make_visualizer

        vis = make_visualizer(queue, box_discr, visual_order)

        from volumential.volume_fmm import interpolate_volume_potential

        volume_potential = interpolate_volume_potential(
            box_nodes, trav, wrangler, pot)

        # qx = q_points[0].get()
        # qy = q_points[1].get()
        # qz = q_points[2].get()
        exact_solution = cl.array.to_device(
            queue,
            solu_eval(queue, np.array([box_nodes_x, box_nodes_y,
                                       box_nodes_z])))

        # clean up the mess
        def clean_file(filename):
            import os

            try:
                os.remove(filename)
            except OSError:
                pass

        vtu_filename = "laplace3d.vtu"
        clean_file(vtu_filename)
        vis.write_vtk_file(
            vtu_filename,
            [
                ("VolPot", volume_potential),
                # ("SrcDensity", source_density),
                ("ExactSol", exact_solution),
                ("Error", volume_potential - exact_solution),
            ],
        )
        print("Written file " + vtu_filename)
Пример #19
0
def plot_traversal(ctx_getter, do_plot=False):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    #for dims in [2, 3]:
    for dims in [2]:
        nparticles = 10**4
        dtype = np.float64

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=15)

        from pytools.obj_array import make_obj_array
        particles = make_obj_array(
            [rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)])

        # if do_plot:
        #     pt.plot(particles[0].get(), particles[1].get(), "x")

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        queue.finish()
        tree = tb(queue, particles, max_particles_in_box=30, debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx)
        trav = tg(queue, tree).get()

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        #plotter.draw_box_numbers()
        plotter.set_bounding_box()

        from random import randrange, seed
        seed(7)

        # {{{ generic box drawing helper

        def draw_some_box_lists(starts, lists, key_to_box=None, count=5):
            actual_count = 0
            while actual_count < count:
                if key_to_box is not None:
                    key = randrange(len(key_to_box))
                    ibox = key_to_box[key]
                else:
                    key = ibox = randrange(tree.nboxes)

                start, end = starts[key:key + 2]
                if start == end:
                    continue

                #print ibox, start, end, lists[start:end]
                for jbox in lists[start:end]:
                    plotter.draw_box(jbox, facecolor='yellow')

                plotter.draw_box(ibox, facecolor='red')

                actual_count += 1

        # }}}

        if 0:
            # colleagues
            draw_some_box_lists(trav.colleagues_starts, trav.colleagues_lists)
        elif 0:
            # near neighbors ("list 1")
            draw_some_box_lists(trav.neighbor_leaves_starts,
                                trav.neighbor_leaves_lists,
                                key_to_box=trav.source_boxes)
        elif 0:
            # well-separated siblings (list 2)
            draw_some_box_lists(trav.sep_siblings_starts,
                                trav.sep_siblings_lists)
        elif 1:
            # separated smaller (list 3)
            draw_some_box_lists(trav.sep_smaller_starts,
                                trav.sep_smaller_lists,
                                key_to_box=trav.source_boxes)
        elif 1:
            # separated bigger (list 4)
            draw_some_box_lists(trav.sep_bigger_starts, trav.sep_bigger_lists)

        import matplotlib.pyplot as pt
        pt.show()
Пример #20
0
def laplace_problem(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    dim = 2
    dtype = np.float64

    q_order = 2  # quadrature order
    n_levels = 3  # 2^(n_levels-1) subintervals in 1D

    # adaptive_mesh = True
    n_refinement_loops = 100
    refined_n_cells = 1000
    rratio_top = 0.2
    rratio_bot = 0.5

    # bounding box
    a = -1.
    b = 1.

    m_order = 15  # multipole order

    alpha = 160 / np.sqrt(2)

    def source_field(x):
        assert len(x) == dim
        assert dim == 2
        norm2 = x[0] ** 2 + x[1] ** 2
        lap_u = (4 * alpha ** 2 * norm2 - 4 * alpha) * np.exp(-alpha * norm2)
        return -lap_u

    def exact_solu(x, y):
        norm2 = x ** 2 + y ** 2
        return np.exp(-alpha * norm2)

    # {{{ generate quad points

    mesh = mg.MeshGen2D(q_order, n_levels, a, b)
    iloop = 0
    while mesh.n_active_cells() < refined_n_cells:
        iloop += 1
        crtr = np.array(
            [
                np.abs(source_field(c) * m)
                for (c, m) in zip(mesh.get_cell_centers(), mesh.get_cell_measures())
            ]
        )
        mesh.update_mesh(crtr, rratio_top, rratio_bot)
        if iloop > n_refinement_loops:
            print("Max number of refinement loops reached.")
            break

    q_points = mesh.get_q_points()
    q_weights = mesh.get_q_weights()
    # q_radii = None

    assert len(q_points) == len(q_weights)
    assert q_points.shape[1] == dim

    q_points_org = q_points
    q_points = np.ascontiguousarray(np.transpose(q_points))

    from pytools.obj_array import make_obj_array

    q_points = make_obj_array(
        [cl.array.to_device(queue, q_points[i]) for i in range(dim)]
    )

    q_weights = cl.array.to_device(queue, q_weights)
    # q_radii = cl.array.to_device(queue, q_radii)

    # }}}

    # {{{ discretize the source field

    source_vals = cl.array.to_device(
        queue, np.array([source_field(qp) for qp in q_points_org])
    )

    # particle_weigt = source_val * q_weight

    # }}} End discretize the source field

    # {{{ build tree and traversals

    from boxtree.tools import AXIS_NAMES

    axis_names = AXIS_NAMES[:dim]

    from pytools import single_valued

    coord_dtype = single_valued(coord.dtype for coord in q_points)
    from boxtree.bounding_box import make_bounding_box_dtype

    bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype)

    bbox = np.empty(1, bbox_type)
    for ax in axis_names:
        bbox["min_" + ax] = a
        bbox["max_" + ax] = b

    # tune max_particles_in_box to reconstruct the mesh
    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)
    tree, _ = tb(
        queue,
        particles=q_points,
        targets=q_points,
        bbox=bbox,
        max_particles_in_box=q_order ** 2 * 4 - 1,
        kind="adaptive-level-restricted",
    )

    from boxtree.traversal import FMMTraversalBuilder

    tg = FMMTraversalBuilder(ctx)
    trav, _ = tg(queue, tree)

    # }}} End build tree and traversals

    # {{{ build near field potential table

    from volumential.table_manager import NearFieldInteractionTableManager

    subprocess.check_call(['rm', '-f', 'nft-test-volume-fmm.hdf5'])
    tm = NearFieldInteractionTableManager("nft-test-volume-fmm.hdf5")
    nftable, _ = tm.get_table(dim, "Laplace", q_order)

    # }}} End build near field potential table

    # {{{ sumpy expansion for laplace kernel

    from sumpy.kernel import LaplaceKernel

    # from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion
    # from sumpy.expansion.local import VolumeTaylorLocalExpansion

    from sumpy.expansion.multipole import (
        LaplaceConformingVolumeTaylorMultipoleExpansion,
    )
    from sumpy.expansion.local import LaplaceConformingVolumeTaylorLocalExpansion

    knl = LaplaceKernel(dim)
    out_kernels = [knl]
    local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion
    mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion
    # local_expn_class = VolumeTaylorLocalExpansion
    # mpole_expn_class = VolumeTaylorMultipoleExpansion

    exclude_self = True
    from volumential.expansion_wrangler_fpnd import (
            FPNDExpansionWranglerCodeContainer,
            FPNDExpansionWrangler)

    wcc = FPNDExpansionWranglerCodeContainer(
        ctx,
        partial(mpole_expn_class, knl),
        partial(local_expn_class, knl),
        out_kernels,
        exclude_self=exclude_self,
    )

    if exclude_self:
        target_to_source = np.arange(tree.ntargets, dtype=np.int32)
        self_extra_kwargs = {"target_to_source": target_to_source}
    else:
        self_extra_kwargs = {}

    wrangler = FPNDExpansionWrangler(
        code_container=wcc,
        queue=queue,
        tree=tree,
        near_field_table=nftable,
        dtype=dtype,
        fmm_level_to_order=lambda kernel, kernel_args, tree, lev: m_order,
        quad_order=q_order,
        self_extra_kwargs=self_extra_kwargs,
    )

    # }}} End sumpy expansion for laplace kernel

    return trav, wrangler, source_vals, q_weights
Пример #21
0
def demo_cost_model():
    if not SUPPORTS_PROCESS_TIME:
        raise NotImplementedError(
            "Currently this script uses process time which only works on Python>=3.3"
        )

    from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler

    nsources_list = [1000, 2000, 3000, 4000, 5000]
    ntargets_list = [1000, 2000, 3000, 4000, 5000]
    dims = 3
    dtype = np.float64

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    traversals = []
    traversals_dev = []
    level_to_orders = []
    timing_results = []

    def fmm_level_to_nterms(tree, ilevel):
        return 10

    for nsources, ntargets in zip(nsources_list, ntargets_list):
        # {{{ Generate sources, targets and target_radii

        from boxtree.tools import make_normal_particle_array as p_normal
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = p_normal(queue, ntargets, dims, dtype, seed=18)

        from pyopencl.clrandom import PhiloxGenerator
        rng = PhiloxGenerator(queue.context, seed=22)
        target_radii = rng.uniform(queue, ntargets, a=0, b=0.05,
                                   dtype=dtype).get()

        # }}}

        # {{{ Generate tree and traversal

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)
        tree, _ = tb(queue,
                     sources,
                     targets=targets,
                     target_radii=target_radii,
                     stick_out_factor=0.15,
                     max_particles_in_box=30,
                     debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
        trav_dev, _ = tg(queue, tree, debug=True)
        trav = trav_dev.get(queue=queue)

        traversals.append(trav)
        traversals_dev.append(trav_dev)

        # }}}

        wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms)
        level_to_orders.append(wrangler.level_nterms)

        timing_data = {}
        from boxtree.fmm import drive_fmm
        src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype)
        drive_fmm(trav, wrangler, src_weights, timing_data=timing_data)

        timing_results.append(timing_data)

    time_field_name = "process_elapsed"

    from boxtree.cost import FMMCostModel
    from boxtree.cost import make_pde_aware_translation_cost_model
    cost_model = FMMCostModel(make_pde_aware_translation_cost_model)

    model_results = []
    for icase in range(len(traversals) - 1):
        traversal = traversals_dev[icase]
        model_results.append(
            cost_model.cost_per_stage(
                queue,
                traversal,
                level_to_orders[icase],
                FMMCostModel.get_unit_calibration_params(),
            ))
    queue.finish()

    params = cost_model.estimate_calibration_params(
        model_results, timing_results[:-1], time_field_name=time_field_name)

    predicted_time = cost_model.cost_per_stage(
        queue,
        traversals_dev[-1],
        level_to_orders[-1],
        params,
    )
    queue.finish()

    for field in [
            "form_multipoles", "eval_direct", "multipole_to_local",
            "eval_multipoles", "form_locals", "eval_locals",
            "coarsen_multipoles", "refine_locals"
    ]:
        measured = timing_results[-1][field]["process_elapsed"]
        pred_err = ((measured - predicted_time[field]) / measured)
        logger.info("actual/predicted time for %s: %.3g/%.3g -> %g %% error",
                    field, measured, predicted_time[field],
                    abs(100 * pred_err))
Пример #22
0
 def build_traversal(self):
     from boxtree.traversal import FMMTraversalBuilder
     return FMMTraversalBuilder(self.cl_context)
Пример #23
0
def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dtype):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    # {{{ Generate sources, targets and target_radii

    from boxtree.tools import make_normal_particle_array as p_normal
    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = p_normal(queue, ntargets, dims, dtype, seed=18)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=22)
    target_radii = rng.uniform(
        queue, ntargets, a=0, b=0.05, dtype=dtype
    ).get()

    # }}}

    # {{{ Generate tree and traversal

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)
    tree, _ = tb(
        queue, sources, targets=targets, target_radii=target_radii,
        stick_out_factor=0.15, max_particles_in_box=30, debug=True
    )

    from boxtree.traversal import FMMTraversalBuilder
    tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2)
    trav_dev, _ = tg(queue, tree, debug=True)
    trav = trav_dev.get(queue=queue)

    # }}}

    # {{{ Construct cost models

    cl_cost_model = FMMCostModel(None)
    python_cost_model = _PythonFMMCostModel(None)

    constant_one_params = cl_cost_model.get_unit_calibration_params().copy()
    for ilevel in range(trav.tree.nlevels):
        constant_one_params["p_fmm_lev%d" % ilevel] = 10

    xlat_cost = make_pde_aware_translation_cost_model(dims, trav.tree.nlevels)

    # }}}

    # {{{ Test process_form_multipoles

    nlevels = trav.tree.nlevels
    p2m_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        p2m_cost[ilevel] = evaluate(
            xlat_cost.p2m(ilevel),
            context=constant_one_params
        )
    p2m_cost_dev = cl.array.to_device(queue, p2m_cost)

    queue.finish()
    start_time = time.time()

    cl_form_multipoles = cl_cost_model.process_form_multipoles(
        queue, trav_dev, p2m_cost_dev
    )

    queue.finish()
    logger.info("OpenCL time for process_form_multipoles: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_form_multipoles = python_cost_model.process_form_multipoles(
        queue, trav, p2m_cost
    )

    logger.info("Python time for process_form_multipoles: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_form_multipoles.get(), python_form_multipoles)

    # }}}

    # {{{ Test process_coarsen_multipoles

    m2m_cost = np.zeros(nlevels - 1, dtype=np.float64)
    for target_level in range(nlevels - 1):
        m2m_cost[target_level] = evaluate(
            xlat_cost.m2m(target_level + 1, target_level),
            context=constant_one_params
        )
    m2m_cost_dev = cl.array.to_device(queue, m2m_cost)

    queue.finish()
    start_time = time.time()
    cl_coarsen_multipoles = cl_cost_model.process_coarsen_multipoles(
        queue, trav_dev, m2m_cost_dev
    )

    queue.finish()
    logger.info("OpenCL time for coarsen_multipoles: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_coarsen_multipoles = python_cost_model.process_coarsen_multipoles(
        queue, trav, m2m_cost
    )

    logger.info("Python time for coarsen_multipoles: {0}".format(
        str(time.time() - start_time)
    ))

    assert cl_coarsen_multipoles == python_coarsen_multipoles

    # }}}

    # {{{ Test process_direct

    queue.finish()
    start_time = time.time()

    cl_ndirect_sources_per_target_box = \
        cl_cost_model.get_ndirect_sources_per_target_box(queue, trav_dev)

    cl_direct = cl_cost_model.process_direct(
        queue, trav_dev, cl_ndirect_sources_per_target_box, 5.0
    )

    queue.finish()
    logger.info("OpenCL time for process_direct: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_ndirect_sources_per_target_box = \
        python_cost_model.get_ndirect_sources_per_target_box(queue, trav)

    python_direct = python_cost_model.process_direct(
        queue, trav, python_ndirect_sources_per_target_box, 5.0
    )

    logger.info("Python time for process_direct: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_direct.get(), python_direct)

    # }}}

    # {{{ Test aggregate_over_boxes

    start_time = time.time()

    cl_direct_aggregate = cl_cost_model.aggregate_over_boxes(cl_direct)

    queue.finish()
    logger.info("OpenCL time for aggregate_over_boxes: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_direct_aggregate = python_cost_model.aggregate_over_boxes(python_direct)

    logger.info("Python time for aggregate_over_boxes: {0}".format(
        str(time.time() - start_time)
    ))

    assert cl_direct_aggregate == python_direct_aggregate

    # }}}

    # {{{ Test process_list2

    nlevels = trav.tree.nlevels
    m2l_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        m2l_cost[ilevel] = evaluate(
            xlat_cost.m2l(ilevel, ilevel),
            context=constant_one_params
        )
    m2l_cost_dev = cl.array.to_device(queue, m2l_cost)

    queue.finish()
    start_time = time.time()

    cl_m2l_cost = cl_cost_model.process_list2(queue, trav_dev, m2l_cost_dev)

    queue.finish()
    logger.info("OpenCL time for process_list2: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_m2l_cost = python_cost_model.process_list2(queue, trav, m2l_cost)
    logger.info("Python time for process_list2: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_m2l_cost.get(), python_m2l_cost)

    # }}}

    # {{{ Test process_list 3

    m2p_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        m2p_cost[ilevel] = evaluate(
            xlat_cost.m2p(ilevel),
            context=constant_one_params
        )
    m2p_cost_dev = cl.array.to_device(queue, m2p_cost)

    queue.finish()
    start_time = time.time()

    cl_m2p_cost = cl_cost_model.process_list3(queue, trav_dev, m2p_cost_dev)

    queue.finish()
    logger.info("OpenCL time for process_list3: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_m2p_cost = python_cost_model.process_list3(queue, trav, m2p_cost)
    logger.info("Python time for process_list3: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_m2p_cost.get(), python_m2p_cost)

    # }}}

    # {{{ Test process_list4

    p2l_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        p2l_cost[ilevel] = evaluate(
            xlat_cost.p2l(ilevel),
            context=constant_one_params
        )
    p2l_cost_dev = cl.array.to_device(queue, p2l_cost)

    queue.finish()
    start_time = time.time()

    cl_p2l_cost = cl_cost_model.process_list4(queue, trav_dev, p2l_cost_dev)

    queue.finish()
    logger.info("OpenCL time for process_list4: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_p2l_cost = python_cost_model.process_list4(queue, trav, p2l_cost)
    logger.info("Python time for process_list4: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_p2l_cost.get(), python_p2l_cost)

    # }}}

    # {{{ Test process_refine_locals

    l2l_cost = np.zeros(nlevels - 1, dtype=np.float64)
    for ilevel in range(nlevels - 1):
        l2l_cost[ilevel] = evaluate(
            xlat_cost.l2l(ilevel, ilevel + 1),
            context=constant_one_params
        )
    l2l_cost_dev = cl.array.to_device(queue, l2l_cost)

    queue.finish()
    start_time = time.time()

    cl_refine_locals_cost = cl_cost_model.process_refine_locals(
        queue, trav_dev, l2l_cost_dev
    )

    queue.finish()
    logger.info("OpenCL time for refine_locals: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_refine_locals_cost = python_cost_model.process_refine_locals(
        queue, trav, l2l_cost
    )
    logger.info("Python time for refine_locals: {0}".format(
        str(time.time() - start_time)
    ))

    assert cl_refine_locals_cost == python_refine_locals_cost

    # }}}

    # {{{ Test process_eval_locals

    l2p_cost = np.zeros(nlevels, dtype=np.float64)
    for ilevel in range(nlevels):
        l2p_cost[ilevel] = evaluate(
            xlat_cost.l2p(ilevel),
            context=constant_one_params
        )
    l2p_cost_dev = cl.array.to_device(queue, l2p_cost)

    queue.finish()
    start_time = time.time()

    cl_l2p_cost = cl_cost_model.process_eval_locals(queue, trav_dev, l2p_cost_dev)

    queue.finish()
    logger.info("OpenCL time for process_eval_locals: {0}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()
    python_l2p_cost = python_cost_model.process_eval_locals(queue, trav, l2p_cost)
    logger.info("Python time for process_eval_locals: {0}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_l2p_cost.get(), python_l2p_cost)
Пример #24
0
def test_sumpy_fmm_exclude_self(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 500
    dtype = np.float64

    from boxtree.tools import (make_normal_particle_array as p_normal)

    knl = LaplaceKernel(2)
    local_expn_class = VolumeTaylorLocalExpansion
    mpole_expn_class = VolumeTaylorMultipoleExpansion
    order = 10

    sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(ctx)
    weights = rng.uniform(queue, nsources, dtype=np.float64)

    target_to_source = np.arange(tree.ntargets, dtype=np.int32)
    self_extra_kwargs = {"target_to_source": target_to_source}

    out_kernels = [knl]

    from functools import partial

    from sumpy.fmm import SumpyExpansionWranglerCodeContainer
    wcc = SumpyExpansionWranglerCodeContainer(ctx,
                                              partial(mpole_expn_class, knl),
                                              partial(local_expn_class, knl),
                                              out_kernels,
                                              exclude_self=True)

    wrangler = wcc.get_wrangler(
        queue,
        tree,
        dtype,
        fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
        self_extra_kwargs=self_extra_kwargs)

    from boxtree.fmm import drive_fmm

    pot, = drive_fmm(trav, wrangler, weights)

    from sumpy import P2P
    p2p = P2P(ctx, out_kernels, exclude_self=True)
    evt, (ref_pot, ) = p2p(queue, sources, sources, (weights, ),
                           **self_extra_kwargs)

    pot = pot.get()
    ref_pot = ref_pot.get()

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("order %d -> relative l2 error: %g" % (order, rel_err))

    assert np.isclose(rel_err, 0, atol=1e-7)
Пример #25
0
def main():

    print("*************************")
    print("* Setting up...")
    print("*************************")

    dim = 2

    # download precomputation results for the 2D Laplace kernel
    download_table = True
    table_filename = "nft_laplace2d.hdf5"
    root_table_source_extent = 2

    print("Using table cache:", table_filename)

    q_order = 9  # quadrature order
    n_levels = 6  # 2^(n_levels-1) subintervals in 1D

    use_multilevel_table = False

    adaptive_mesh = False
    n_refinement_loops = 100
    refined_n_cells = 2000
    rratio_top = 0.2
    rratio_bot = 0.5

    dtype = np.float64

    m_order = 20  # multipole order
    force_direct_evaluation = False

    print("Multipole order =", m_order)

    alpha = 160

    x = pmbl.var("x")
    y = pmbl.var("y")
    expp = pmbl.var("exp")

    norm2 = x**2 + y**2
    source_expr = -(4 * alpha**2 * norm2 - 4 * alpha) * expp(-alpha * norm2)
    solu_expr = expp(-alpha * norm2)

    logger.info("Source expr: " + str(source_expr))
    logger.info("Solu expr: " + str(solu_expr))

    # bounding box
    a = -0.5
    b = 0.5
    root_table_source_extent = 2

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    source_eval = Eval(dim, source_expr, [x, y])

    # {{{ generate quad points

    import volumential.meshgen as mg

    # Show meshgen info
    mg.greet()

    mesh = mg.MeshGen2D(q_order, n_levels, a, b, queue=queue)
    if not adaptive_mesh:
        mesh.print_info()
        q_points = mesh.get_q_points()
        q_weights = mesh.get_q_weights()

    else:
        iloop = -1
        while mesh.n_active_cells() < refined_n_cells:
            iloop += 1
            crtr = np.abs(
                source_eval(mesh.get_cell_centers) * mesh.get_cell_measures)
            mesh.update_mesh(crtr, rratio_top, rratio_bot)
            if iloop > n_refinement_loops:
                print("Max number of refinement loops reached.")
                break

        mesh.print_info()
        q_points = mesh.get_q_points()
        q_weights = mesh.get_q_weights()

    assert len(q_points) == len(q_weights)
    assert q_points.shape[1] == dim

    q_points = np.ascontiguousarray(np.transpose(q_points))

    from pytools.obj_array import make_obj_array

    q_points = make_obj_array(
        [cl.array.to_device(queue, q_points[i]) for i in range(dim)])

    q_weights = cl.array.to_device(queue, q_weights)
    # q_radii = cl.array.to_device(queue, q_radii)

    # }}}

    # {{{ discretize the source field

    source_vals = cl.array.to_device(
        queue,
        source_eval(queue, np.array([coords.get() for coords in q_points])))

    # particle_weigt = source_val * q_weight

    # }}} End discretize the source field

    # {{{ build tree and traversals

    from boxtree.tools import AXIS_NAMES

    axis_names = AXIS_NAMES[:dim]

    from pytools import single_valued

    coord_dtype = single_valued(coord.dtype for coord in q_points)
    from boxtree.bounding_box import make_bounding_box_dtype

    bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype)
    bbox = np.empty(1, bbox_type)
    for ax in axis_names:
        bbox["min_" + ax] = a
        bbox["max_" + ax] = b

    # tune max_particles_in_box to reconstruct the mesh
    # TODO: use points from FieldPlotter are used as target points for better
    # visuals
    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)
    tree, _ = tb(
        queue,
        particles=q_points,
        targets=q_points,
        bbox=bbox,
        max_particles_in_box=q_order**2 * 4 - 1,
        kind="adaptive-level-restricted",
    )

    bbox2 = np.array([[a, b], [a, b]])
    tree2, _ = tb(
        queue,
        particles=q_points,
        targets=q_points,
        bbox=bbox2,
        max_particles_in_box=q_order**2 * 4 - 1,
        kind="adaptive-level-restricted",
    )

    from boxtree.traversal import FMMTraversalBuilder

    tg = FMMTraversalBuilder(ctx)
    trav, _ = tg(queue, tree)

    # }}} End build tree and traversals

    # {{{ build near field potential table

    from volumential.table_manager import NearFieldInteractionTableManager
    import os

    if download_table and (not os.path.isfile(table_filename)):
        import json
        with open("table_urls.json", 'r') as fp:
            urls = json.load(fp)

        print("Downloading table from %s" % urls['Laplace2D'])
        import subprocess
        subprocess.call(["wget", "-q", urls['Laplace2D'], table_filename])

    tm = NearFieldInteractionTableManager(table_filename,
                                          root_extent=root_table_source_extent,
                                          queue=queue)

    if use_multilevel_table:
        assert (abs(
            int((b - a) / root_table_source_extent) *
            root_table_source_extent - (b - a)) < 1e-15)
        nftable = []
        for lev in range(0, tree.nlevels + 1):
            print("Getting table at level", lev)
            tb, _ = tm.get_table(
                dim,
                "Laplace",
                q_order,
                source_box_level=lev,
                compute_method="DrosteSum",
                queue=queue,
                n_brick_quad_points=100,
                adaptive_level=False,
                use_symmetry=True,
                alpha=0.1,
                nlevels=15,
            )
            nftable.append(tb)

        print("Using table list of length", len(nftable))

    else:
        nftable, _ = tm.get_table(
            dim,
            "Laplace",
            q_order,
            force_recompute=False,
            compute_method="DrosteSum",
            queue=queue,
            n_brick_quad_points=100,
            adaptive_level=False,
            use_symmetry=True,
            alpha=0.1,
            nlevels=15,
        )

    # }}} End build near field potential table

    # {{{ sumpy expansion for laplace kernel

    from sumpy.expansion import DefaultExpansionFactory
    from sumpy.kernel import LaplaceKernel

    knl = LaplaceKernel(dim)
    out_kernels = [knl]

    expn_factory = DefaultExpansionFactory()
    local_expn_class = expn_factory.get_local_expansion_class(knl)
    mpole_expn_class = expn_factory.get_multipole_expansion_class(knl)

    exclude_self = True

    from volumential.expansion_wrangler_fpnd import (
        FPNDExpansionWranglerCodeContainer, FPNDExpansionWrangler)

    wcc = FPNDExpansionWranglerCodeContainer(
        ctx,
        partial(mpole_expn_class, knl),
        partial(local_expn_class, knl),
        out_kernels,
        exclude_self=exclude_self,
    )

    if exclude_self:
        target_to_source = np.arange(tree.ntargets, dtype=np.int32)
        self_extra_kwargs = {"target_to_source": target_to_source}
    else:
        self_extra_kwargs = {}

    wrangler = FPNDExpansionWrangler(
        code_container=wcc,
        queue=queue,
        tree=tree,
        near_field_table=nftable,
        dtype=dtype,
        fmm_level_to_order=lambda kernel, kernel_args, tree, lev: m_order,
        quad_order=q_order,
        self_extra_kwargs=self_extra_kwargs,
    )

    # }}} End sumpy expansion for laplace kernel

    print("*************************")
    print("* Performing FMM ...")
    print("*************************")

    # {{{ conduct fmm computation

    from volumential.volume_fmm import drive_volume_fmm

    import time
    queue.finish()

    t0 = time.time()

    pot, = drive_volume_fmm(
        trav,
        wrangler,
        source_vals * q_weights,
        source_vals,
        direct_evaluation=force_direct_evaluation,
    )
    queue.finish()

    t1 = time.time()

    print("Finished in %.2f seconds." % (t1 - t0))
    print("(%e points per second)" % (len(q_weights) / (t1 - t0)))

    # }}} End conduct fmm computation

    print("*************************")
    print("* Postprocessing ...")
    print("*************************")

    # {{{ postprocess and plot

    # print(pot)

    solu_eval = Eval(dim, solu_expr, [x, y])

    x = q_points[0].get()
    y = q_points[1].get()
    ze = solu_eval(queue, np.array([x, y]))
    zs = pot.get()

    print_error = True
    if print_error:
        err = np.max(np.abs(ze - zs))
        print("Error =", err)

    # Interpolated surface
    if 0:
        h = 0.005
        out_x = np.arange(a, b + h, h)
        out_y = np.arange(a, b + h, h)
        oxx, oyy = np.meshgrid(out_x, out_y)
        out_targets = make_obj_array([
            cl.array.to_device(queue, oxx.flatten()),
            cl.array.to_device(queue, oyy.flatten()),
        ])

        from volumential.volume_fmm import interpolate_volume_potential

        # src = source_field([q.get() for q in q_points])
        # src = cl.array.to_device(queue, src)
        interp_pot = interpolate_volume_potential(out_targets, trav, wrangler,
                                                  pot)
        opot = interp_pot.get()

        import matplotlib.pyplot as plt
        from mpl_toolkits.mplot3d import Axes3D

        plt3d = plt.figure()
        ax = Axes3D(plt3d)  # noqa
        surf = ax.plot_surface(oxx, oyy, opot.reshape(oxx.shape))  # noqa
        # ax.scatter(x, y, src.get())
        # ax.set_zlim(-0.25, 0.25)

        plt.draw()
        plt.show()

    # Boxtree
    if 0:
        import matplotlib.pyplot as plt

        if dim == 2:
            # plt.plot(q_points[0].get(), q_points[1].get(), ".")
            pass

        from boxtree.visualization import TreePlotter

        plotter = TreePlotter(tree.get(queue=queue))
        plotter.draw_tree(fill=False, edgecolor="black")
        # plotter.draw_box_numbers()
        plotter.set_bounding_box()
        plt.gca().set_aspect("equal")

        plt.draw()
        # plt.show()
        plt.savefig("tree.png")

    # Direct p2p
    if 0:
        print("Performing P2P")
        pot_direct, = drive_volume_fmm(trav,
                                       wrangler,
                                       source_vals * q_weights,
                                       source_vals,
                                       direct_evaluation=True)
        zds = pot_direct.get()
        zs = pot.get()

        print("P2P-FMM diff =", np.max(np.abs(zs - zds)))

        print("P2P Error =", np.max(np.abs(ze - zds)))
        """
        import matplotlib.pyplot as plt
        import matplotlib.cm as cm
        x = q_points[0].get()
        y = q_points[1].get()
        plt.scatter(x, y, c=np.log(abs(zs-zds)) / np.log(10), cmap=cm.jet)
        plt.colorbar()

        plt.xlabel("Multipole order = " + str(m_order))

        plt.draw()
        plt.show()
        """

    # Scatter plot
    if 0:
        import matplotlib.pyplot as plt
        from mpl_toolkits.mplot3d import Axes3D

        x = q_points[0].get()
        y = q_points[1].get()
        ze = solu_eval(queue, np.array([x, y]))
        zs = pot.get()

        plt3d = plt.figure()
        ax = Axes3D(plt3d)
        ax.scatter(x, y, zs, s=1)
        # ax.scatter(x, y, source_field([q.get() for q in q_points]), s=1)
        # import matplotlib.cm as cm

        # ax.scatter(x, y, zs, c=np.log(abs(zs-zds)), cmap=cm.jet)
        # plt.gca().set_aspect("equal")

        # ax.set_xlim3d([-1, 1])
        # ax.set_ylim3d([-1, 1])
        # ax.set_zlim3d([np.min(z), np.max(z)])
        # ax.set_zlim3d([-0.002, 0.00])

        plt.draw()
        plt.show()
def drive_test_completeness(ctx, queue, dim, q_order):

    n_levels = 2  # 2^(n_levels-1) subintervals in 1D, must be at least 2

    # bounding box
    a = -1
    b = 1

    dtype = np.float64

    def source_field(x):
        assert len(x) == dim
        return 1

    # {{{ generate quad points

    import volumential.meshgen as mg

    q_points, q_weights, q_radii = mg.make_uniform_cubic_grid(degree=q_order,
                                                              level=n_levels,
                                                              dim=dim)

    assert len(q_points) == len(q_weights)
    assert q_points.shape[1] == dim

    q_points_org = q_points
    q_points = np.ascontiguousarray(np.transpose(q_points))

    from pytools.obj_array import make_obj_array

    q_points = make_obj_array(
        [cl.array.to_device(queue, q_points[i]) for i in range(dim)])

    q_weights = cl.array.to_device(queue, q_weights)
    if q_radii is not None:
        q_radii = cl.array.to_device(queue, q_radii)

    # }}}

    # {{{ discretize the source field

    source_vals = cl.array.to_device(
        queue, np.array([source_field(qp) for qp in q_points_org]))

    # }}} End discretize the source field

    # {{{ build tree and traversals

    from boxtree.tools import AXIS_NAMES

    axis_names = AXIS_NAMES[:dim]

    from pytools import single_valued

    coord_dtype = single_valued(coord.dtype for coord in q_points)
    from boxtree.bounding_box import make_bounding_box_dtype

    bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype)

    bbox = np.empty(1, bbox_type)
    for ax in axis_names:
        bbox["min_" + ax] = a
        bbox["max_" + ax] = b

    # tune max_particles_in_box to reconstruct the mesh
    # TODO: use points from FieldPlotter are used as target points for better
    # visuals
    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)
    tree, _ = tb(
        queue,
        particles=q_points,
        targets=q_points,
        bbox=bbox,
        max_particles_in_box=q_order**dim * (2**dim) - 1,
        kind="adaptive-level-restricted",
    )

    from boxtree.traversal import FMMTraversalBuilder

    tg = FMMTraversalBuilder(ctx)
    trav, _ = tg(queue, tree)

    # }}} End build tree and traversals

    from volumential.table_manager import NearFieldInteractionTableManager

    subprocess.check_call(['rm', '-f', 'nft-test-completeness.hdf5'])
    with NearFieldInteractionTableManager("nft-test-completeness.hdf5",
                                          progress_bar=False) as tm:

        nft, _ = tm.get_table(dim,
                              "Constant",
                              q_order,
                              queue=queue,
                              n_levels=1,
                              alpha=0,
                              compute_method="DrosteSum",
                              n_brick_quad_points=50,
                              adaptive_level=False,
                              use_symmetry=True)

    # {{{ expansion wrangler

    from sumpy.kernel import LaplaceKernel
    from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion
    from sumpy.expansion.local import VolumeTaylorLocalExpansion

    knl = LaplaceKernel(dim)
    out_kernels = [knl]
    local_expn_class = VolumeTaylorLocalExpansion
    mpole_expn_class = VolumeTaylorMultipoleExpansion

    from volumential.expansion_wrangler_fpnd import (
        FPNDExpansionWranglerCodeContainer, FPNDExpansionWrangler)

    wcc = FPNDExpansionWranglerCodeContainer(
        ctx,
        partial(mpole_expn_class, knl),
        partial(local_expn_class, knl),
        out_kernels,
        exclude_self=True,
    )

    wrangler = FPNDExpansionWrangler(
        code_container=wcc,
        queue=queue,
        tree=tree,
        near_field_table=nft,
        dtype=dtype,
        fmm_level_to_order=lambda kernel, kernel_args, tree, lev: 1,
        quad_order=q_order,
    )

    # }}} End sumpy expansion for laplace kernel
    pot = wrangler.eval_direct(trav.target_boxes,
                               trav.neighbor_source_boxes_starts,
                               trav.neighbor_source_boxes_lists,
                               mode_coefs=source_vals)
    pot = pot[0]
    for p in pot[0]:
        assert (abs(p - 2**dim) < 1e-8)
Пример #27
0
def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    dims = 3
    nparticles = 10**4
    dtype = np.float64

    # {{{ build tree

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=15)

    from pytools.obj_array import make_obj_array
    particles = make_obj_array([
        rng.normal(queue, nparticles, dtype=dtype)
        for i in range(dims)])

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True)

    # }}}

    # {{{ build traversal

    from boxtree.traversal import FMMTraversalBuilder
    from boxtree.rotation_classes import RotationClassesBuilder

    tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away)
    trav, _ = tg(queue, tree)

    rb = RotationClassesBuilder(ctx)
    result, _ = rb(queue, trav, tree)

    rot_classes = result.from_sep_siblings_rotation_classes.get(queue)
    rot_angles = result.from_sep_siblings_rotation_class_to_angle.get(queue)

    tree = tree.get(queue=queue)
    trav = trav.get(queue=queue)

    centers = tree.box_centers.T

    # }}}

    # For each entry of from_sep_siblings, compute the source-target translation
    # direction as a vector, and check that the from_sep_siblings rotation class
    # in the traversal corresponds to the angle with the z-axis of the
    # translation direction.

    for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes):
        start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2]
        seps = trav.from_sep_siblings_lists[start:end]
        level_rot_classes = rot_classes[start:end]

        translation_vecs = centers[tgt_ibox] - centers[seps]
        theta = np.arctan2(
                la.norm(translation_vecs[:, :dims - 1], axis=1),
                translation_vecs[:, dims - 1])
        level_rot_angles = rot_angles[level_rot_classes]

        assert np.allclose(theta, level_rot_angles, atol=1e-13, rtol=1e-13)