def build_traversal(self): from boxtree.traversal import FMMTraversalBuilder return FMMTraversalBuilder( self.cl_context, well_sep_is_n_away=self._well_sep_is_n_away, from_sep_smaller_crit=self._from_sep_smaller_crit, )
def __init__(self, ctx_getter=cl.create_some_context, enable_extents=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.xfail( "won't work on devices with the struct arg count issue") logging.basicConfig(level=logging.INFO) dims = 2 nsources = 9000000 ntargets = 9000000 dtype = np.float32 from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, #targets=targets, max_particles_in_box=30, #target_radii=target_radii, #stick_out_factor=0.25, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree self.tree = host_tree self.trav = host_trav self.input = [host_tree, weights, weights_sum, host_trav] self.pot = None
def test_plot_traversal(ctx_factory, well_sep_is_n_away=1, plot=False): pytest.importorskip("matplotlib") ctx = ctx_factory() queue = cl.CommandQueue(ctx) #for dims in [2, 3]: for dims in [2]: nparticles = 10**4 dtype = np.float64 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array([ rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) # if do_plot: # pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) tree = tree.get(queue=queue) trav = trav.get(queue=queue) from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black") #plotter.draw_box_numbers() plotter.set_bounding_box() from random import randrange, seed # noqa seed(7) from boxtree.visualization import draw_box_lists #draw_box_lists(randrange(tree.nboxes)) if well_sep_is_n_away == 1: draw_box_lists(plotter, trav, 380) elif well_sep_is_n_away == 2: draw_box_lists(plotter, trav, 320) #plotter.draw_box_numbers() if plot: import matplotlib.pyplot as pt pt.gca().set_xticks([]) pt.gca().set_yticks([]) pt.show()
def test_interaction_list_particle_count_thresholding(ctx_getter, enable_extents): ctx = ctx_getter() queue = cl.CommandQueue(ctx) logging.basicConfig(level=logging.INFO) dims = 2 nsources = 1000 ntargets = 1000 dtype = np.float max_particles_in_box = 30 # Ensure that we have underfilled boxes. from_sep_smaller_min_nsources_cumul = 1 + max_particles_in_box from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=max_particles_in_box, target_radii=target_radii, debug=True, stick_out_factor=0.25) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild( queue, tree, debug=True, _from_sep_smaller_min_nsources_cumul=from_sep_smaller_min_nsources_cumul ) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree wrangler = ConstantOneExpansionWrangler(host_tree) pot = drive_fmm(host_trav, wrangler, weights) assert (pot == weights_sum).all()
def test_fmm_float32(ctx_getter=cl.create_some_context, enable_extents=True): from time import time ctx = ctx_getter() queue = cl.CommandQueue(ctx) from pyopencl.characterize import has_struct_arg_count_bug if has_struct_arg_count_bug(queue.device): pytest.xfail("won't work on devices with the struct arg count issue") logging.basicConfig(level=logging.INFO) dims = 2 nsources = 3000000 ntargets = 3000000 dtype = np.float32 from boxtree.fmm import drive_fmm sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=15) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if enable_extents: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, target_radii=target_radii,stick_out_factor=0.25, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree wrangler = ConstantOneExpansionWrangler(host_tree) ti = time() pot = drive_fmm(host_trav, wrangler, weights) print(time() - ti) assert (pot == weights_sum).all()
def test_sumpy_fmm_timing_data_collection(ctx_factory): logging.basicConfig(level=logging.INFO) ctx = ctx_factory() queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) nsources = 500 dtype = np.float64 from boxtree.tools import ( make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 1 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler(queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) from boxtree.fmm import drive_fmm timing_data = {} pot, = drive_fmm(trav, wrangler, (weights,), timing_data=timing_data) print(timing_data) assert timing_data
def __call__(self, queue=None): if queue is None: queue = cl.CommandQueue(self.cl_context) from boxtree import TreeBuilder tb = TreeBuilder(self.cl_context) q_points = self._get_q_points(queue) tree, _ = tb(queue, particles=q_points, targets=q_points, bbox=self._bbox, max_particles_in_box=( (self.n_q_points_per_cell**self.dim) * (2**self.dim) - 1), kind="adaptive-level-restricted") from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(self.cl_context) trav, _ = tg(queue, tree) return BoxFMMGeometryData( self.cl_context, q_points, self._get_q_weights(queue), tree, trav)
def test_pyfmmlib_fmm(ctx_getter): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dims = 2 dtype = np.float64 helmholtz_k = 2 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0])) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) logger.info("computing direct (reference) result") from pyfmmlib import hpotgrad2dall_vec ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k) from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, weights) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("relative l2 error: %g" % rel_err) assert rel_err < 1e-5
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind, well_sep_is_n_away, extent_norm, from_sep_smaller_crit): """Tests whether the built FMM traversal structures and driver completely capture all interactions. """ sources_have_extent = "s" in who_has_extent targets_have_extent = "t" in who_has_extent logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 try: sources = source_gen(queue, nsources_req, dims, dtype, seed=15) nsources = len(sources[0]) if ntargets_req is None: # This says "same as sources" to the tree builder. targets = None ntargets = ntargets_req else: targets = target_gen(queue, ntargets_req, dims, dtype, seed=16) ntargets = len(targets[0]) except ImportError: pytest.skip("loo.py not available, but needed for particle array " "generation") from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=12) if sources_have_extent: source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0) else: source_radii = None if targets_have_extent: target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0) else: target_radii = None from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, source_radii=source_radii, target_radii=target_radii, debug=True, stick_out_factor=0.25, extent_norm=extent_norm) if 0: tree.get().plot() import matplotlib.pyplot as pt pt.show() from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away, from_sep_smaller_crit=from_sep_smaller_crit) trav, _ = tbuild(queue, tree, debug=True) if who_has_extent: pre_merge_trav = trav trav = trav.merge_close_lists(queue) #weights = np.random.randn(nsources) weights = np.ones(nsources) weights_sum = np.sum(weights) host_trav = trav.get(queue=queue) host_tree = host_trav.tree if who_has_extent: pre_merge_host_trav = pre_merge_trav.get(queue=queue) from boxtree.tree import ParticleListFilter plfilt = ParticleListFilter(ctx) if filter_kind: flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \ .astype(np.int8) if filter_kind == "user": filtered_targets = plfilt.filter_target_lists_in_user_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder( host_tree, filtered_targets.get(queue=queue)) elif filter_kind == "tree": filtered_targets = plfilt.filter_target_lists_in_tree_order( queue, tree, flags) wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder( host_tree, filtered_targets.get(queue=queue)) else: raise ValueError("unsupported value of 'filter_kind'") else: wrangler = ConstantOneExpansionWrangler(host_tree) flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8) flags.fill(1) if ntargets is None and not filter_kind: # This check only works for targets == sources. assert (wrangler.reorder_potentials( wrangler.reorder_sources(weights)) == weights).all() from boxtree.fmm import drive_fmm pot = drive_fmm(host_trav, wrangler, weights) if filter_kind: pot = pot[flags.get() > 0] rel_err = la.norm((pot - weights_sum) / nsources) good = rel_err < 1e-8 # {{{ build, evaluate matrix (and identify incorrect interactions) if 0 and not good: mat = np.zeros((ntargets, nsources), dtype) from pytools import ProgressBar logging.getLogger().setLevel(logging.WARNING) pb = ProgressBar("matrix", nsources) for i in range(nsources): unit_vec = np.zeros(nsources, dtype=dtype) unit_vec[i] = 1 mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec) pb.progress() pb.finished() logging.getLogger().setLevel(logging.INFO) import matplotlib.pyplot as pt if 0: pt.imshow(mat) pt.colorbar() pt.show() incorrect_tgts, incorrect_srcs = np.where(mat != 1) if 1 and len(incorrect_tgts): from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() tree_order_incorrect_tgts = \ host_tree.indices_to_tree_target_order(incorrect_tgts) tree_order_incorrect_srcs = \ host_tree.indices_to_tree_source_order(incorrect_srcs) src_boxes = [ host_tree.find_box_nr_for_source(i) for i in tree_order_incorrect_srcs ] tgt_boxes = [ host_tree.find_box_nr_for_target(i) for i in tree_order_incorrect_tgts ] print(src_boxes) print(tgt_boxes) # plot all sources/targets if 0: pt.plot(host_tree.targets[0], host_tree.targets[1], "v", alpha=0.9) pt.plot(host_tree.sources[0], host_tree.sources[1], "gx", alpha=0.9) # plot offending sources/targets if 0: pt.plot(host_tree.targets[0][tree_order_incorrect_tgts], host_tree.targets[1][tree_order_incorrect_tgts], "rv") pt.plot(host_tree.sources[0][tree_order_incorrect_srcs], host_tree.sources[1][tree_order_incorrect_srcs], "go") pt.gca().set_aspect("equal") from boxtree.visualization import draw_box_lists draw_box_lists( plotter, pre_merge_host_trav if who_has_extent else host_trav, 22) # from boxtree.visualization import draw_same_level_non_well_sep_boxes # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2) pt.show() # }}} if 0 and not good: import matplotlib.pyplot as pt pt.plot(pot - weights_sum) pt.show() if 0 and not good: import matplotlib.pyplot as pt filt_targets = [ host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0], ] host_tree.plot() bad = np.abs(pot - weights_sum) >= 1e-3 bad_targets = [ filt_targets[0][bad], filt_targets[1][bad], ] print(bad_targets[0].shape) pt.plot(filt_targets[0], filt_targets[1], "x") pt.plot(bad_targets[0], bad_targets[1], "v") pt.show() assert good
def test_pyfmmlib_fmm(ctx_getter, dims, use_dipoles, helmholtz_k): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 3000 ntargets = 1000 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) sources_host = particle_array_to_host(sources) targets_host = particle_array_to_host(targets) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() #weights = np.ones(nsources) if use_dipoles: np.random.seed(13) dipole_vec = np.random.randn(dims, nsources) else: dipole_vec = None if dims == 2 and helmholtz_k == 0: base_nterms = 20 else: base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 if use_dipoles: result += 1 return result from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler wrangler = FMMLibExpansionWrangler(trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, dipole_vec=dipole_vec) from boxtree.fmm import drive_fmm timing_data = {} pot = drive_fmm(trav, wrangler, weights, timing_data=timing_data) print(timing_data) assert timing_data # {{{ ref fmmlib computation logger.info("computing direct (reference) result") import pyfmmlib fmmlib_routine = getattr( pyfmmlib, "%spot%s%ddall%s_vec" % (wrangler.eqn_letter, "fld" if dims == 3 else "grad", dims, "_dp" if use_dipoles else "")) kwargs = {} if dims == 3: kwargs["iffld"] = False else: kwargs["ifgrad"] = False kwargs["ifhess"] = False if use_dipoles: if helmholtz_k == 0 and dims == 2: kwargs["dipstr"] = -weights * (dipole_vec[0] + 1j * dipole_vec[1]) else: kwargs["dipstr"] = weights kwargs["dipvec"] = dipole_vec else: kwargs["charge"] = weights if helmholtz_k: kwargs["zk"] = helmholtz_k ref_pot = wrangler.finalize_potentials( fmmlib_routine(sources=sources_host.T, targets=targets_host.T, **kwargs)[0]) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) assert rel_err < 1e-5, rel_err # }}} # {{{ check against sumpy try: import sumpy # noqa except ImportError: have_sumpy = False from warnings import warn warn("sumpy unavailable: cannot compute independent reference " "values for pyfmmlib") else: have_sumpy = True if have_sumpy: from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, DirectionalSourceDerivative) from sumpy.p2p import P2P sumpy_extra_kwargs = {} if helmholtz_k: knl = HelmholtzKernel(dims) sumpy_extra_kwargs["k"] = helmholtz_k else: knl = LaplaceKernel(dims) if use_dipoles: knl = DirectionalSourceDerivative(knl) sumpy_extra_kwargs["src_derivative_dir"] = dipole_vec p2p = P2P(ctx, [knl], exclude_self=False) evt, (sumpy_ref_pot, ) = p2p(queue, targets, sources, [weights], out_host=True, **sumpy_extra_kwargs) sumpy_rel_err = (la.norm(pot - sumpy_ref_pot, np.inf) / la.norm(sumpy_ref_pot, np.inf)) logger.info("relative l2 error vs sumpy direct: %g" % sumpy_rel_err) assert sumpy_rel_err < 1e-5, sumpy_rel_err
def test_pyfmmlib_numerical_stability(ctx_factory, dims, helmholtz_k, order): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = 30 dtype = np.float64 # The input particles are arranged with geometrically increasing/decreasing # spacing along a line, to build a deep tree that stress-tests the # translations. particle_line = np.array([2**-i for i in range(nsources // 2)], dtype=dtype) particle_line = np.hstack([particle_line, 3 - particle_line]) zero = np.zeros(nsources, dtype=dtype) sources = np.vstack([particle_line, zero, zero])[:dims] targets = sources * (1 + 1e-3) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=2, debug=True) assert tree.nlevels >= 15 from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) weights = np.ones_like(sources[0]) from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler, FMMLibRotationData) def fmm_level_to_nterms(tree, lev): return order wrangler = FMMLibExpansionWrangler(trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, rotation_data=FMMLibRotationData( queue, trav)) from boxtree.fmm import drive_fmm pot = drive_fmm(trav, wrangler, (weights, )) assert not np.isnan(pot).any() # {{{ ref fmmlib computation logger.info("computing direct (reference) result") ref_pot = get_fmmlib_ref_pot(wrangler, weights, sources, targets, helmholtz_k) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("relative l2 error vs fmmlib direct: %g" % rel_err) if dims == 2: error_bound = (1 / 2)**(1 + order) else: error_bound = (3 / 4)**(1 + order) assert rel_err < error_bound, rel_err
def test_fmm_with_optimized_3d_m2l(ctx_factory, nsrcntgts, helmholtz_k, well_sep_is_n_away): logging.basicConfig(level=logging.INFO) from pytest import importorskip importorskip("pyfmmlib") dims = 3 ctx = ctx_factory() queue = cl.CommandQueue(ctx) nsources = ntargets = nsrcntgts // 2 dtype = np.float64 sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = (p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0, 0])[:dims]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) trav = trav.get(queue=queue) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) weights = rng.uniform(queue, nsources, dtype=np.float64).get() base_nterms = 10 def fmm_level_to_nterms(tree, lev): result = base_nterms if lev < 3 and helmholtz_k: # exercise order-varies-by-level capability result += 5 return result from boxtree.pyfmmlib_integration import (FMMLibExpansionWrangler, FMMLibRotationData) baseline_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms) optimized_wrangler = FMMLibExpansionWrangler( trav.tree, helmholtz_k, fmm_level_to_nterms=fmm_level_to_nterms, rotation_data=FMMLibRotationData(queue, trav)) from boxtree.fmm import drive_fmm baseline_timing_data = {} baseline_pot = drive_fmm(trav, baseline_wrangler, (weights, ), timing_data=baseline_timing_data) optimized_timing_data = {} optimized_pot = drive_fmm(trav, optimized_wrangler, (weights, ), timing_data=optimized_timing_data) baseline_time = baseline_timing_data["multipole_to_local"][ "process_elapsed"] if baseline_time is not None: print("Baseline M2L time : %#.4g s" % baseline_time) opt_time = optimized_timing_data["multipole_to_local"]["process_elapsed"] if opt_time is not None: print("Optimized M2L time: %#.4g s" % opt_time) assert np.allclose(baseline_pot, optimized_pot, atol=1e-13, rtol=1e-13)
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 1000 ntargets = 300 dtype = np.float64 from boxtree.tools import (make_normal_particle_array as p_normal) sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) if 1: offset = np.zeros(knl.dim) offset[0] = 0.1 targets = (p_normal(queue, ntargets, knl.dim, dtype, seed=18) + offset) del offset else: from sumpy.visualization import FieldPlotter fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200) from pytools.obj_array import make_obj_array targets = make_obj_array([fp.points[i] for i in range(knl.dim)]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) # {{{ plot tree if 0: host_tree = tree.get() host_trav = trav.get() if 1: print("src_box", host_tree.find_box_nr_for_source(403)) print("tgt_box", host_tree.find_box_nr_for_target(28)) print(list(host_trav.target_or_target_parent_boxes).index(37)) print(host_trav.get_box_list("sep_bigger", 22)) from boxtree.visualization import TreePlotter plotter = TreePlotter(host_tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.set_bounding_box() plotter.draw_box_numbers() import matplotlib.pyplot as pt pt.show() # }}} from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx, seed=44) weights = rng.uniform(queue, nsources, dtype=np.float64) logger.info("computing direct (reference) result") from pytools.convergence import PConvergenceVerifier pconv_verifier = PConvergenceVerifier() extra_kwargs = {} dtype = np.float64 order_values = [1, 2, 3] if isinstance(knl, HelmholtzKernel): extra_kwargs["k"] = 0.05 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion): order_values = [10, 12] elif isinstance(knl, YukawaKernel): extra_kwargs["lam"] = 2 dtype = np.complex128 if knl.dim == 3: order_values = [1, 2] elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion): order_values = [10, 12] from functools import partial for order in order_values: out_kernels = [knl] from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels) wrangler = wcc.get_wrangler( queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, kernel_extra_kwargs=extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=False) evt, (ref_pot, ) = p2p(queue, targets, sources, (weights, ), **extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) pconv_verifier.add_data_point(order, rel_err) print(pconv_verifier) pconv_verifier()
from pyopencl.clrandom import RanluxGenerator rng = RanluxGenerator(queue, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array( [rng.normal(queue, nparticles, dtype=np.float64) for i in range(dims)]) # ----------------------------------------------------------------------------- # build tree and traversals (lists) # ----------------------------------------------------------------------------- from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, particles, max_particles_in_box=30) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav, _ = tg(queue, tree) # ENDEXAMPLE # ----------------------------------------------------------------------------- # plot the tree # ----------------------------------------------------------------------------- import matplotlib.pyplot as pt pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree.visualization import TreePlotter plotter = TreePlotter(tree.get(queue=queue)) plotter.draw_tree(fill=False, edgecolor="black")
def test_estimate_calibration_params(ctx_factory): from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000] ntargets_list = [1000, 2000, 3000, 4000] dims = 3 dtype = np.float64 ctx = ctx_factory() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform( queue, ntargets, a=0, b=0.05, dtype=dtype ).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data) timing_results.append(timing_data) if SUPPORTS_PROCESS_TIME: time_field_name = "process_elapsed" else: time_field_name = "wall_elapsed" def test_params_sanity(test_params): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert isinstance(test_params[name], np.float64) def test_params_equal(test_params1, test_params2): param_names = ["c_p2m", "c_m2m", "c_p2p", "c_m2l", "c_m2p", "c_p2l", "c_l2l", "c_l2p"] for name in param_names: assert test_params1[name] == test_params2[name] python_cost_model = _PythonFMMCostModel(make_pde_aware_translation_cost_model) python_model_results = [] for icase in range(len(traversals)-1): traversal = traversals[icase] level_to_order = level_to_orders[icase] python_model_results.append(python_cost_model.cost_per_stage( queue, traversal, level_to_order, _PythonFMMCostModel.get_unit_calibration_params(), )) python_params = python_cost_model.estimate_calibration_params( python_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(python_params) cl_cost_model = FMMCostModel(make_pde_aware_translation_cost_model) cl_model_results = [] for icase in range(len(traversals_dev)-1): traversal = traversals_dev[icase] level_to_order = level_to_orders[icase] cl_model_results.append(cl_cost_model.cost_per_stage( queue, traversal, level_to_order, FMMCostModel.get_unit_calibration_params(), )) cl_params = cl_cost_model.estimate_calibration_params( cl_model_results, timing_results[:-1], time_field_name=time_field_name ) test_params_sanity(cl_params) if SUPPORTS_PROCESS_TIME: test_params_equal(cl_params, python_params)
def test_cost_model_op_counts_agree_with_constantone_wrangler( ctx_factory, nsources, ntargets, dims, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=16) targets = p_normal(queue, ntargets, dims, dtype, seed=19) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=20) target_radii = rng.uniform(queue, ntargets, a=0, b=0.04, dtype=dtype).get() from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) from boxtree.tools import ConstantOneExpansionWrangler wrangler = ConstantOneExpansionWrangler(trav.tree) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, (src_weights,), timing_data=timing_data) cost_model = FMMCostModel( translation_cost_model_factory=OpCountingTranslationCostModel ) level_to_order = np.array([1 for _ in range(tree.nlevels)]) modeled_time = cost_model.cost_per_stage( queue, trav_dev, level_to_order, FMMCostModel.get_unit_calibration_params(), ) mismatches = [] for stage in timing_data: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append( (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches) # {{{ Test per-box cost total_cost = 0.0 for stage in timing_data: total_cost += timing_data[stage]["ops_elapsed"] per_box_cost = cost_model.cost_per_box( queue, trav_dev, level_to_order, FMMCostModel.get_unit_calibration_params(), ) total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) assert total_cost == ( total_aggregate_cost + modeled_time["coarsen_multipoles"] + modeled_time["refine_locals"] )
def test_tree_connectivity(ctx_getter, dims, sources_are_targets): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) dtype = np.float64 sources = make_normal_particle_array(queue, 1 * 10**5, dims, dtype) if sources_are_targets: targets = None else: targets = make_normal_particle_array(queue, 2 * 10**5, dims, dtype) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, targets=targets, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav, _ = tg(queue, tree, debug=True) tree = tree.get(queue=queue) trav = trav.get(queue=queue) levels = tree.box_levels parents = tree.box_parent_ids.T children = tree.box_child_ids.T centers = tree.box_centers.T # {{{ parent and child relations, levels match up for ibox in range(1, tree.nboxes): # /!\ Not testing box 0, has no parents parent = parents[ibox] assert levels[parent] + 1 == levels[ibox] assert ibox in children[parent], ibox # }}} if 0: import matplotlib.pyplot as pt from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black") plotter.draw_box_numbers() plotter.set_bounding_box() pt.show() # {{{ neighbor_source_boxes (list 1) consists of source boxes for itgt_box, ibox in enumerate(trav.target_boxes): start, end = trav.neighbor_source_boxes_starts[itgt_box:itgt_box + 2] nbl = trav.neighbor_source_boxes_lists[start:end] if sources_are_targets: assert ibox in nbl for jbox in nbl: assert (0 == children[jbox]).all(), (ibox, jbox, children[jbox]) logger.info("list 1 consists of source boxes") # }}} # {{{ separated siblings (list 2) are actually separated for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): start, end = trav.sep_siblings_starts[itgt_box:itgt_box + 2] seps = trav.sep_siblings_lists[start:end] assert (levels[seps] == levels[tgt_ibox]).all() # three-ish box radii (half of size) mindist = 2.5 * 0.5 * 2**-int(levels[tgt_ibox]) * tree.root_extent icenter = centers[tgt_ibox] for jbox in seps: dist = la.norm(centers[jbox] - icenter) assert dist > mindist, (dist, mindist) logger.info("separated siblings (list 2) are actually separated") # }}} if sources_are_targets: # {{{ sep_{smaller,bigger} are duals of each other assert (trav.target_or_target_parent_boxes == np.arange( tree.nboxes)).all() # {{{ list 4 <= list 3 for itarget_box, ibox in enumerate(trav.target_boxes): for ssn in trav.sep_smaller_by_level: start, end = ssn.starts[itarget_box:itarget_box + 2] for jbox in ssn.lists[start:end]: rstart, rend = trav.sep_bigger_starts[jbox:jbox + 2] assert ibox in trav.sep_bigger_lists[rstart:rend], (ibox, jbox) # }}} # {{{ list 4 <= list 3 box_to_target_box_index = np.empty(tree.nboxes, tree.box_id_dtype) box_to_target_box_index.fill(-1) box_to_target_box_index[trav.target_boxes] = np.arange( len(trav.target_boxes), dtype=tree.box_id_dtype) assert (trav.source_boxes == trav.target_boxes).all() assert (trav.target_or_target_parent_boxes == np.arange( tree.nboxes, dtype=tree.box_id_dtype)).all() for ibox in range(tree.nboxes): start, end = trav.sep_bigger_starts[ibox:ibox + 2] for jbox in trav.sep_bigger_lists[start:end]: # In principle, entries of sep_bigger_lists are # source boxes. In this special case, source and target boxes # are the same thing (i.e. leaves--see assertion above), so we # may treat them as targets anyhow. jtgt_box = box_to_target_box_index[jbox] assert jtgt_box != -1 good = False for ssn in trav.sep_smaller_by_level: rstart, rend = ssn.starts[jtgt_box:jtgt_box + 2] good = good or ibox in ssn.lists[rstart:rend] if not good: from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black", zorder=10) plotter.set_bounding_box() plotter.draw_box(ibox, facecolor='green', alpha=0.5) plotter.draw_box(jbox, facecolor='red', alpha=0.5) import matplotlib.pyplot as pt pt.gca().set_aspect("equal") pt.show() # This assertion failing means that ibox's list 4 contains a box # 'jbox' whose list 3 does not contain ibox. assert good, (ibox, jbox) # }}} logger.info("list 3, 4 are duals") # }}} # {{{ sep_smaller satisfies relative level assumption for itarget_box, ibox in enumerate(trav.target_boxes): for ssn in trav.sep_smaller_by_level: start, end = ssn.starts[itarget_box:itarget_box + 2] for jbox in ssn.lists[start:end]: assert levels[ibox] < levels[jbox] logger.info("list 3 satisfies relative level assumption") # }}} # {{{ sep_bigger satisfies relative level assumption for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): start, end = trav.sep_bigger_starts[itgt_box:itgt_box + 2] for jbox in trav.sep_bigger_lists[start:end]: assert levels[tgt_ibox] > levels[jbox] logger.info("list 4 satisfies relative level assumption") # }}} # {{{ level_start_*_box_nrs lists make sense for name, ref_array in [("level_start_source_box_nrs", trav.source_boxes), ("level_start_source_parent_box_nrs", trav.source_parent_boxes), ("level_start_target_box_nrs", trav.target_boxes), ("level_start_target_or_target_parent_box_nrs", trav.target_or_target_parent_boxes)]: level_starts = getattr(trav, name) for lev in range(tree.nlevels): start, stop = level_starts[lev:lev + 2] box_nrs = ref_array[start:stop] assert (tree.box_levels[box_nrs] == lev).all(), name
def main(): print("*************************") print("* Setting up...") print("*************************") dim = 3 # download precomputation results for the 3D Laplace kernel download_table = True table_filename = "nft_laplace3d.hdf5" logger.info("Using table cache: " + table_filename) q_order = 7 # quadrature order n_levels = 5 use_multilevel_table = False adaptive_mesh = False n_refinement_loops = 100 refined_n_cells = 5e5 rratio_top = 0.2 rratio_bot = 0.5 dtype = np.float64 m_order = 10 # multipole order force_direct_evaluation = False logger.info("Multipole order = " + str(m_order)) logger.info("Quad order = " + str(q_order)) logger.info("N_levels = " + str(n_levels)) # a solution that is nearly zero at the boundary # exp(-40) = 4.25e-18 alpha = 80 x = pmbl.var("x") y = pmbl.var("y") z = pmbl.var("z") expp = pmbl.var("exp") norm2 = x**2 + y**2 + z**2 source_expr = -(4 * alpha**2 * norm2 - 6 * alpha) * expp(-alpha * norm2) solu_expr = expp(-alpha * norm2) logger.info("Source expr: " + str(source_expr)) logger.info("Solu expr: " + str(solu_expr)) # bounding box a = -0.5 b = 0.5 root_table_source_extent = 2 ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) # logger.info("Summary of params: " + get_param_summary()) source_eval = Eval(dim, source_expr, [x, y, z]) # {{{ generate quad points import volumential.meshgen as mg # Show meshgen info mg.greet() mesh = mg.MeshGen3D(q_order, n_levels, a, b, queue=queue) if not adaptive_mesh: mesh.print_info() q_points = mesh.get_q_points() q_weights = mesh.get_q_weights() else: iloop = -1 while mesh.n_active_cells() < refined_n_cells: iloop += 1 cell_centers = mesh.get_cell_centers() cell_measures = mesh.get_cell_measures() density_vals = source_eval( queue, np.array([[center[d] for center in cell_centers] for d in range(dim)])) crtr = np.abs(cell_measures * density_vals) mesh.update_mesh(crtr, rratio_top, rratio_bot) if iloop > n_refinement_loops: print("Max number of refinement loops reached.") break mesh.print_info() q_points = mesh.get_q_points() q_weights = mesh.get_q_weights() if 1: try: mesh.generate_gmsh("box_grid.msh") except Exception as e: print(e) pass legacy_msh_file = True if legacy_msh_file: import os os.system("gmsh box_grid.msh convert_grid -") assert len(q_points) == len(q_weights) assert q_points.shape[1] == dim q_points = np.ascontiguousarray(np.transpose(q_points)) from pytools.obj_array import make_obj_array q_points = make_obj_array( [cl.array.to_device(queue, q_points[i]) for i in range(dim)]) q_weights = cl.array.to_device(queue, q_weights) # }}} # {{{ discretize the source field logger.info("discretizing source field") source_vals = cl.array.to_device( queue, source_eval(queue, np.array([coords.get() for coords in q_points]))) # particle_weigt = source_val * q_weight # }}} End discretize the source field # {{{ build tree and traversals from boxtree.tools import AXIS_NAMES axis_names = AXIS_NAMES[:dim] from pytools import single_valued coord_dtype = single_valued(coord.dtype for coord in q_points) from boxtree.bounding_box import make_bounding_box_dtype bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype) bbox = np.empty(1, bbox_type) for ax in axis_names: bbox["min_" + ax] = a bbox["max_" + ax] = b # tune max_particles_in_box to reconstruct the mesh # TODO: use points from FieldPlotter are used as target points for better # visuals print("building tree") from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, particles=q_points, targets=q_points, bbox=bbox, max_particles_in_box=q_order**3 * 8 - 1, kind="adaptive-level-restricted", ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav, _ = tg(queue, tree) # }}} End build tree and traversals # {{{ build near field potential table from volumential.table_manager import NearFieldInteractionTableManager import os if download_table and (not os.path.isfile(table_filename)): import json with open("table_urls.json", 'r') as fp: urls = json.load(fp) print("Downloading table from %s" % urls['Laplace3D']) import subprocess subprocess.call(["wget", "-q", urls['Laplace3D'], table_filename]) tm = NearFieldInteractionTableManager(table_filename, root_extent=root_table_source_extent, queue=queue) if use_multilevel_table: logger.info("Using multilevel tables") assert (abs( int((b - a) / root_table_source_extent) * root_table_source_extent - (b - a)) < 1e-15) nftable = [] for lev in range(0, tree.nlevels + 1): print("Getting table at level", lev) tb, _ = tm.get_table( dim, "Laplace", q_order, source_box_level=lev, compute_method="DrosteSum", queue=queue, n_brick_quad_points=120, adaptive_level=False, use_symmetry=True, alpha=0, n_levels=1, ) nftable.append(tb) print("Using table list of length", len(nftable)) else: logger.info("Using single level table") force_recompute = False # 15 levels are sufficient (the inner most brick is 1e-15**3 in volume) nftable, _ = tm.get_table( dim, "Laplace", q_order, force_recompute=force_recompute, compute_method="DrosteSum", queue=queue, n_brick_quad_points=120, adaptive_level=False, use_symmetry=True, alpha=0, n_levels=1, ) # }}} End build near field potential table # {{{ sumpy expansion for laplace kernel from sumpy.expansion import DefaultExpansionFactory from sumpy.kernel import LaplaceKernel knl = LaplaceKernel(dim) out_kernels = [knl] expn_factory = DefaultExpansionFactory() local_expn_class = expn_factory.get_local_expansion_class(knl) mpole_expn_class = expn_factory.get_multipole_expansion_class(knl) exclude_self = True from volumential.expansion_wrangler_fpnd import ( FPNDExpansionWrangler, FPNDExpansionWranglerCodeContainer) wcc = FPNDExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=exclude_self, ) if exclude_self: target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} else: self_extra_kwargs = {} wrangler = FPNDExpansionWrangler( code_container=wcc, queue=queue, tree=tree, near_field_table=nftable, dtype=dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: m_order, quad_order=q_order, self_extra_kwargs=self_extra_kwargs, ) # }}} End sumpy expansion for laplace kernel print("*************************") print("* Performing FMM ...") print("*************************") # {{{ conduct fmm computation from volumential.volume_fmm import drive_volume_fmm import time queue.finish() t0 = time.time() pot, = drive_volume_fmm(trav, wrangler, source_vals * q_weights, source_vals, direct_evaluation=force_direct_evaluation, list1_only=False) t1 = time.time() print("Finished in %.2f seconds." % (t1 - t0)) print("(%e points per second)" % (len(q_weights) / (t1 - t0))) # }}} End conduct fmm computation print("*************************") print("* Postprocessing ...") print("*************************") # {{{ postprocess and plot # print(pot) solu_eval = Eval(dim, solu_expr, [x, y, z]) # x = q_points[0].get() # y = q_points[1].get() # z = q_points[2].get() test_x = np.array([0.0]) test_y = np.array([0.0]) test_z = np.array([0.0]) test_nodes = make_obj_array( # get() first for CL compatibility issues [ cl.array.to_device(queue, test_x), cl.array.to_device(queue, test_y), cl.array.to_device(queue, test_z), ]) from volumential.volume_fmm import interpolate_volume_potential ze = solu_eval(queue, np.array([test_x, test_y, test_z])) zs = interpolate_volume_potential(test_nodes, trav, wrangler, pot).get() print_error = True if print_error: err = np.max(np.abs(ze - zs)) print("Error =", err) # Boxtree if 0: import matplotlib.pyplot as plt if dim == 2: plt.plot(q_points[0].get(), q_points[1].get(), ".") from boxtree.visualization import TreePlotter plotter = TreePlotter(tree.get(queue=queue)) plotter.draw_tree(fill=False, edgecolor="black") # plotter.draw_box_numbers() plotter.set_bounding_box() plt.gca().set_aspect("equal") plt.draw() plt.show() # plt.savefig("tree.png") # Direct p2p if 0: print("Performing P2P") pot_direct, = drive_volume_fmm(trav, wrangler, source_vals * q_weights, source_vals, direct_evaluation=True) zds = pot_direct.get() zs = pot.get() print("P2P-FMM diff =", np.max(np.abs(zs - zds))) print("P2P Error =", np.max(np.abs(ze - zds))) # Write vtk if 0: from meshmode.mesh.io import read_gmsh modemesh = read_gmsh("box_grid.msh", force_ambient_dim=None) from meshmode.discretization.poly_element import ( LegendreGaussLobattoTensorProductGroupFactory, ) from meshmode.array_context import PyOpenCLArrayContext from meshmode.discretization import Discretization actx = PyOpenCLArrayContext(queue) box_discr = Discretization( actx, modemesh, LegendreGaussLobattoTensorProductGroupFactory(q_order)) box_nodes_x = box_discr.nodes()[0].with_queue(queue).get() box_nodes_y = box_discr.nodes()[1].with_queue(queue).get() box_nodes_z = box_discr.nodes()[2].with_queue(queue).get() box_nodes = make_obj_array( # get() first for CL compatibility issues [ cl.array.to_device(queue, box_nodes_x), cl.array.to_device(queue, box_nodes_y), cl.array.to_device(queue, box_nodes_z), ]) visual_order = 1 from meshmode.discretization.visualization import make_visualizer vis = make_visualizer(queue, box_discr, visual_order) from volumential.volume_fmm import interpolate_volume_potential volume_potential = interpolate_volume_potential( box_nodes, trav, wrangler, pot) # qx = q_points[0].get() # qy = q_points[1].get() # qz = q_points[2].get() exact_solution = cl.array.to_device( queue, solu_eval(queue, np.array([box_nodes_x, box_nodes_y, box_nodes_z]))) # clean up the mess def clean_file(filename): import os try: os.remove(filename) except OSError: pass vtu_filename = "laplace3d.vtu" clean_file(vtu_filename) vis.write_vtk_file( vtu_filename, [ ("VolPot", volume_potential), # ("SrcDensity", source_density), ("ExactSol", exact_solution), ("Error", volume_potential - exact_solution), ], ) print("Written file " + vtu_filename)
def plot_traversal(ctx_getter, do_plot=False): ctx = ctx_getter() queue = cl.CommandQueue(ctx) #for dims in [2, 3]: for dims in [2]: nparticles = 10**4 dtype = np.float64 from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array( [rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) # if do_plot: # pt.plot(particles[0].get(), particles[1].get(), "x") from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree = tb(queue, particles, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav = tg(queue, tree).get() from boxtree.visualization import TreePlotter plotter = TreePlotter(tree) plotter.draw_tree(fill=False, edgecolor="black") #plotter.draw_box_numbers() plotter.set_bounding_box() from random import randrange, seed seed(7) # {{{ generic box drawing helper def draw_some_box_lists(starts, lists, key_to_box=None, count=5): actual_count = 0 while actual_count < count: if key_to_box is not None: key = randrange(len(key_to_box)) ibox = key_to_box[key] else: key = ibox = randrange(tree.nboxes) start, end = starts[key:key + 2] if start == end: continue #print ibox, start, end, lists[start:end] for jbox in lists[start:end]: plotter.draw_box(jbox, facecolor='yellow') plotter.draw_box(ibox, facecolor='red') actual_count += 1 # }}} if 0: # colleagues draw_some_box_lists(trav.colleagues_starts, trav.colleagues_lists) elif 0: # near neighbors ("list 1") draw_some_box_lists(trav.neighbor_leaves_starts, trav.neighbor_leaves_lists, key_to_box=trav.source_boxes) elif 0: # well-separated siblings (list 2) draw_some_box_lists(trav.sep_siblings_starts, trav.sep_siblings_lists) elif 1: # separated smaller (list 3) draw_some_box_lists(trav.sep_smaller_starts, trav.sep_smaller_lists, key_to_box=trav.source_boxes) elif 1: # separated bigger (list 4) draw_some_box_lists(trav.sep_bigger_starts, trav.sep_bigger_lists) import matplotlib.pyplot as pt pt.show()
def laplace_problem(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) dim = 2 dtype = np.float64 q_order = 2 # quadrature order n_levels = 3 # 2^(n_levels-1) subintervals in 1D # adaptive_mesh = True n_refinement_loops = 100 refined_n_cells = 1000 rratio_top = 0.2 rratio_bot = 0.5 # bounding box a = -1. b = 1. m_order = 15 # multipole order alpha = 160 / np.sqrt(2) def source_field(x): assert len(x) == dim assert dim == 2 norm2 = x[0] ** 2 + x[1] ** 2 lap_u = (4 * alpha ** 2 * norm2 - 4 * alpha) * np.exp(-alpha * norm2) return -lap_u def exact_solu(x, y): norm2 = x ** 2 + y ** 2 return np.exp(-alpha * norm2) # {{{ generate quad points mesh = mg.MeshGen2D(q_order, n_levels, a, b) iloop = 0 while mesh.n_active_cells() < refined_n_cells: iloop += 1 crtr = np.array( [ np.abs(source_field(c) * m) for (c, m) in zip(mesh.get_cell_centers(), mesh.get_cell_measures()) ] ) mesh.update_mesh(crtr, rratio_top, rratio_bot) if iloop > n_refinement_loops: print("Max number of refinement loops reached.") break q_points = mesh.get_q_points() q_weights = mesh.get_q_weights() # q_radii = None assert len(q_points) == len(q_weights) assert q_points.shape[1] == dim q_points_org = q_points q_points = np.ascontiguousarray(np.transpose(q_points)) from pytools.obj_array import make_obj_array q_points = make_obj_array( [cl.array.to_device(queue, q_points[i]) for i in range(dim)] ) q_weights = cl.array.to_device(queue, q_weights) # q_radii = cl.array.to_device(queue, q_radii) # }}} # {{{ discretize the source field source_vals = cl.array.to_device( queue, np.array([source_field(qp) for qp in q_points_org]) ) # particle_weigt = source_val * q_weight # }}} End discretize the source field # {{{ build tree and traversals from boxtree.tools import AXIS_NAMES axis_names = AXIS_NAMES[:dim] from pytools import single_valued coord_dtype = single_valued(coord.dtype for coord in q_points) from boxtree.bounding_box import make_bounding_box_dtype bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype) bbox = np.empty(1, bbox_type) for ax in axis_names: bbox["min_" + ax] = a bbox["max_" + ax] = b # tune max_particles_in_box to reconstruct the mesh from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, particles=q_points, targets=q_points, bbox=bbox, max_particles_in_box=q_order ** 2 * 4 - 1, kind="adaptive-level-restricted", ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav, _ = tg(queue, tree) # }}} End build tree and traversals # {{{ build near field potential table from volumential.table_manager import NearFieldInteractionTableManager subprocess.check_call(['rm', '-f', 'nft-test-volume-fmm.hdf5']) tm = NearFieldInteractionTableManager("nft-test-volume-fmm.hdf5") nftable, _ = tm.get_table(dim, "Laplace", q_order) # }}} End build near field potential table # {{{ sumpy expansion for laplace kernel from sumpy.kernel import LaplaceKernel # from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion # from sumpy.expansion.local import VolumeTaylorLocalExpansion from sumpy.expansion.multipole import ( LaplaceConformingVolumeTaylorMultipoleExpansion, ) from sumpy.expansion.local import LaplaceConformingVolumeTaylorLocalExpansion knl = LaplaceKernel(dim) out_kernels = [knl] local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion # local_expn_class = VolumeTaylorLocalExpansion # mpole_expn_class = VolumeTaylorMultipoleExpansion exclude_self = True from volumential.expansion_wrangler_fpnd import ( FPNDExpansionWranglerCodeContainer, FPNDExpansionWrangler) wcc = FPNDExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=exclude_self, ) if exclude_self: target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} else: self_extra_kwargs = {} wrangler = FPNDExpansionWrangler( code_container=wcc, queue=queue, tree=tree, near_field_table=nftable, dtype=dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: m_order, quad_order=q_order, self_extra_kwargs=self_extra_kwargs, ) # }}} End sumpy expansion for laplace kernel return trav, wrangler, source_vals, q_weights
def demo_cost_model(): if not SUPPORTS_PROCESS_TIME: raise NotImplementedError( "Currently this script uses process time which only works on Python>=3.3" ) from boxtree.pyfmmlib_integration import FMMLibExpansionWrangler nsources_list = [1000, 2000, 3000, 4000, 5000] ntargets_list = [1000, 2000, 3000, 4000, 5000] dims = 3 dtype = np.float64 ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) traversals = [] traversals_dev = [] level_to_orders = [] timing_results = [] def fmm_level_to_nterms(tree, ilevel): return 10 for nsources, ntargets in zip(nsources_list, ntargets_list): # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform(queue, ntargets, a=0, b=0.05, dtype=dtype).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) traversals.append(trav) traversals_dev.append(trav_dev) # }}} wrangler = FMMLibExpansionWrangler(trav.tree, 0, fmm_level_to_nterms) level_to_orders.append(wrangler.level_nterms) timing_data = {} from boxtree.fmm import drive_fmm src_weights = np.random.rand(tree.nsources).astype(tree.coord_dtype) drive_fmm(trav, wrangler, src_weights, timing_data=timing_data) timing_results.append(timing_data) time_field_name = "process_elapsed" from boxtree.cost import FMMCostModel from boxtree.cost import make_pde_aware_translation_cost_model cost_model = FMMCostModel(make_pde_aware_translation_cost_model) model_results = [] for icase in range(len(traversals) - 1): traversal = traversals_dev[icase] model_results.append( cost_model.cost_per_stage( queue, traversal, level_to_orders[icase], FMMCostModel.get_unit_calibration_params(), )) queue.finish() params = cost_model.estimate_calibration_params( model_results, timing_results[:-1], time_field_name=time_field_name) predicted_time = cost_model.cost_per_stage( queue, traversals_dev[-1], level_to_orders[-1], params, ) queue.finish() for field in [ "form_multipoles", "eval_direct", "multipole_to_local", "eval_multipoles", "form_locals", "eval_locals", "coarsen_multipoles", "refine_locals" ]: measured = timing_results[-1][field]["process_elapsed"] pred_err = ((measured - predicted_time[field]) / measured) logger.info("actual/predicted time for %s: %.3g/%.3g -> %g %% error", field, measured, predicted_time[field], abs(100 * pred_err))
def build_traversal(self): from boxtree.traversal import FMMTraversalBuilder return FMMTraversalBuilder(self.cl_context)
def test_compare_cl_and_py_cost_model(ctx_factory, nsources, ntargets, dims, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) # {{{ Generate sources, targets and target_radii from boxtree.tools import make_normal_particle_array as p_normal sources = p_normal(queue, nsources, dims, dtype, seed=15) targets = p_normal(queue, ntargets, dims, dtype, seed=18) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=22) target_radii = rng.uniform( queue, ntargets, a=0, b=0.05, dtype=dtype ).get() # }}} # {{{ Generate tree and traversal from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.15, max_particles_in_box=30, debug=True ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=2) trav_dev, _ = tg(queue, tree, debug=True) trav = trav_dev.get(queue=queue) # }}} # {{{ Construct cost models cl_cost_model = FMMCostModel(None) python_cost_model = _PythonFMMCostModel(None) constant_one_params = cl_cost_model.get_unit_calibration_params().copy() for ilevel in range(trav.tree.nlevels): constant_one_params["p_fmm_lev%d" % ilevel] = 10 xlat_cost = make_pde_aware_translation_cost_model(dims, trav.tree.nlevels) # }}} # {{{ Test process_form_multipoles nlevels = trav.tree.nlevels p2m_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): p2m_cost[ilevel] = evaluate( xlat_cost.p2m(ilevel), context=constant_one_params ) p2m_cost_dev = cl.array.to_device(queue, p2m_cost) queue.finish() start_time = time.time() cl_form_multipoles = cl_cost_model.process_form_multipoles( queue, trav_dev, p2m_cost_dev ) queue.finish() logger.info("OpenCL time for process_form_multipoles: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_form_multipoles = python_cost_model.process_form_multipoles( queue, trav, p2m_cost ) logger.info("Python time for process_form_multipoles: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_form_multipoles.get(), python_form_multipoles) # }}} # {{{ Test process_coarsen_multipoles m2m_cost = np.zeros(nlevels - 1, dtype=np.float64) for target_level in range(nlevels - 1): m2m_cost[target_level] = evaluate( xlat_cost.m2m(target_level + 1, target_level), context=constant_one_params ) m2m_cost_dev = cl.array.to_device(queue, m2m_cost) queue.finish() start_time = time.time() cl_coarsen_multipoles = cl_cost_model.process_coarsen_multipoles( queue, trav_dev, m2m_cost_dev ) queue.finish() logger.info("OpenCL time for coarsen_multipoles: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_coarsen_multipoles = python_cost_model.process_coarsen_multipoles( queue, trav, m2m_cost ) logger.info("Python time for coarsen_multipoles: {0}".format( str(time.time() - start_time) )) assert cl_coarsen_multipoles == python_coarsen_multipoles # }}} # {{{ Test process_direct queue.finish() start_time = time.time() cl_ndirect_sources_per_target_box = \ cl_cost_model.get_ndirect_sources_per_target_box(queue, trav_dev) cl_direct = cl_cost_model.process_direct( queue, trav_dev, cl_ndirect_sources_per_target_box, 5.0 ) queue.finish() logger.info("OpenCL time for process_direct: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_ndirect_sources_per_target_box = \ python_cost_model.get_ndirect_sources_per_target_box(queue, trav) python_direct = python_cost_model.process_direct( queue, trav, python_ndirect_sources_per_target_box, 5.0 ) logger.info("Python time for process_direct: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_direct.get(), python_direct) # }}} # {{{ Test aggregate_over_boxes start_time = time.time() cl_direct_aggregate = cl_cost_model.aggregate_over_boxes(cl_direct) queue.finish() logger.info("OpenCL time for aggregate_over_boxes: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_direct_aggregate = python_cost_model.aggregate_over_boxes(python_direct) logger.info("Python time for aggregate_over_boxes: {0}".format( str(time.time() - start_time) )) assert cl_direct_aggregate == python_direct_aggregate # }}} # {{{ Test process_list2 nlevels = trav.tree.nlevels m2l_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): m2l_cost[ilevel] = evaluate( xlat_cost.m2l(ilevel, ilevel), context=constant_one_params ) m2l_cost_dev = cl.array.to_device(queue, m2l_cost) queue.finish() start_time = time.time() cl_m2l_cost = cl_cost_model.process_list2(queue, trav_dev, m2l_cost_dev) queue.finish() logger.info("OpenCL time for process_list2: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_m2l_cost = python_cost_model.process_list2(queue, trav, m2l_cost) logger.info("Python time for process_list2: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_m2l_cost.get(), python_m2l_cost) # }}} # {{{ Test process_list 3 m2p_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): m2p_cost[ilevel] = evaluate( xlat_cost.m2p(ilevel), context=constant_one_params ) m2p_cost_dev = cl.array.to_device(queue, m2p_cost) queue.finish() start_time = time.time() cl_m2p_cost = cl_cost_model.process_list3(queue, trav_dev, m2p_cost_dev) queue.finish() logger.info("OpenCL time for process_list3: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_m2p_cost = python_cost_model.process_list3(queue, trav, m2p_cost) logger.info("Python time for process_list3: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_m2p_cost.get(), python_m2p_cost) # }}} # {{{ Test process_list4 p2l_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): p2l_cost[ilevel] = evaluate( xlat_cost.p2l(ilevel), context=constant_one_params ) p2l_cost_dev = cl.array.to_device(queue, p2l_cost) queue.finish() start_time = time.time() cl_p2l_cost = cl_cost_model.process_list4(queue, trav_dev, p2l_cost_dev) queue.finish() logger.info("OpenCL time for process_list4: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_p2l_cost = python_cost_model.process_list4(queue, trav, p2l_cost) logger.info("Python time for process_list4: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_p2l_cost.get(), python_p2l_cost) # }}} # {{{ Test process_refine_locals l2l_cost = np.zeros(nlevels - 1, dtype=np.float64) for ilevel in range(nlevels - 1): l2l_cost[ilevel] = evaluate( xlat_cost.l2l(ilevel, ilevel + 1), context=constant_one_params ) l2l_cost_dev = cl.array.to_device(queue, l2l_cost) queue.finish() start_time = time.time() cl_refine_locals_cost = cl_cost_model.process_refine_locals( queue, trav_dev, l2l_cost_dev ) queue.finish() logger.info("OpenCL time for refine_locals: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_refine_locals_cost = python_cost_model.process_refine_locals( queue, trav, l2l_cost ) logger.info("Python time for refine_locals: {0}".format( str(time.time() - start_time) )) assert cl_refine_locals_cost == python_refine_locals_cost # }}} # {{{ Test process_eval_locals l2p_cost = np.zeros(nlevels, dtype=np.float64) for ilevel in range(nlevels): l2p_cost[ilevel] = evaluate( xlat_cost.l2p(ilevel), context=constant_one_params ) l2p_cost_dev = cl.array.to_device(queue, l2p_cost) queue.finish() start_time = time.time() cl_l2p_cost = cl_cost_model.process_eval_locals(queue, trav_dev, l2p_cost_dev) queue.finish() logger.info("OpenCL time for process_eval_locals: {0}".format( str(time.time() - start_time) )) start_time = time.time() python_l2p_cost = python_cost_model.process_eval_locals(queue, trav, l2p_cost) logger.info("Python time for process_eval_locals: {0}".format( str(time.time() - start_time) )) assert np.array_equal(cl_l2p_cost.get(), python_l2p_cost)
def test_sumpy_fmm_exclude_self(ctx_getter): logging.basicConfig(level=logging.INFO) ctx = ctx_getter() queue = cl.CommandQueue(ctx) nsources = 500 dtype = np.float64 from boxtree.tools import (make_normal_particle_array as p_normal) knl = LaplaceKernel(2) local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion order = 10 sources = p_normal(queue, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb(queue, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder tbuild = FMMTraversalBuilder(ctx) trav, _ = tbuild(queue, tree, debug=True) from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(ctx) weights = rng.uniform(queue, nsources, dtype=np.float64) target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} out_kernels = [knl] from functools import partial from sumpy.fmm import SumpyExpansionWranglerCodeContainer wcc = SumpyExpansionWranglerCodeContainer(ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=True) wrangler = wcc.get_wrangler( queue, tree, dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order, self_extra_kwargs=self_extra_kwargs) from boxtree.fmm import drive_fmm pot, = drive_fmm(trav, wrangler, weights) from sumpy import P2P p2p = P2P(ctx, out_kernels, exclude_self=True) evt, (ref_pot, ) = p2p(queue, sources, sources, (weights, ), **self_extra_kwargs) pot = pot.get() ref_pot = ref_pot.get() rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("order %d -> relative l2 error: %g" % (order, rel_err)) assert np.isclose(rel_err, 0, atol=1e-7)
def main(): print("*************************") print("* Setting up...") print("*************************") dim = 2 # download precomputation results for the 2D Laplace kernel download_table = True table_filename = "nft_laplace2d.hdf5" root_table_source_extent = 2 print("Using table cache:", table_filename) q_order = 9 # quadrature order n_levels = 6 # 2^(n_levels-1) subintervals in 1D use_multilevel_table = False adaptive_mesh = False n_refinement_loops = 100 refined_n_cells = 2000 rratio_top = 0.2 rratio_bot = 0.5 dtype = np.float64 m_order = 20 # multipole order force_direct_evaluation = False print("Multipole order =", m_order) alpha = 160 x = pmbl.var("x") y = pmbl.var("y") expp = pmbl.var("exp") norm2 = x**2 + y**2 source_expr = -(4 * alpha**2 * norm2 - 4 * alpha) * expp(-alpha * norm2) solu_expr = expp(-alpha * norm2) logger.info("Source expr: " + str(source_expr)) logger.info("Solu expr: " + str(solu_expr)) # bounding box a = -0.5 b = 0.5 root_table_source_extent = 2 ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) source_eval = Eval(dim, source_expr, [x, y]) # {{{ generate quad points import volumential.meshgen as mg # Show meshgen info mg.greet() mesh = mg.MeshGen2D(q_order, n_levels, a, b, queue=queue) if not adaptive_mesh: mesh.print_info() q_points = mesh.get_q_points() q_weights = mesh.get_q_weights() else: iloop = -1 while mesh.n_active_cells() < refined_n_cells: iloop += 1 crtr = np.abs( source_eval(mesh.get_cell_centers) * mesh.get_cell_measures) mesh.update_mesh(crtr, rratio_top, rratio_bot) if iloop > n_refinement_loops: print("Max number of refinement loops reached.") break mesh.print_info() q_points = mesh.get_q_points() q_weights = mesh.get_q_weights() assert len(q_points) == len(q_weights) assert q_points.shape[1] == dim q_points = np.ascontiguousarray(np.transpose(q_points)) from pytools.obj_array import make_obj_array q_points = make_obj_array( [cl.array.to_device(queue, q_points[i]) for i in range(dim)]) q_weights = cl.array.to_device(queue, q_weights) # q_radii = cl.array.to_device(queue, q_radii) # }}} # {{{ discretize the source field source_vals = cl.array.to_device( queue, source_eval(queue, np.array([coords.get() for coords in q_points]))) # particle_weigt = source_val * q_weight # }}} End discretize the source field # {{{ build tree and traversals from boxtree.tools import AXIS_NAMES axis_names = AXIS_NAMES[:dim] from pytools import single_valued coord_dtype = single_valued(coord.dtype for coord in q_points) from boxtree.bounding_box import make_bounding_box_dtype bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype) bbox = np.empty(1, bbox_type) for ax in axis_names: bbox["min_" + ax] = a bbox["max_" + ax] = b # tune max_particles_in_box to reconstruct the mesh # TODO: use points from FieldPlotter are used as target points for better # visuals from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, particles=q_points, targets=q_points, bbox=bbox, max_particles_in_box=q_order**2 * 4 - 1, kind="adaptive-level-restricted", ) bbox2 = np.array([[a, b], [a, b]]) tree2, _ = tb( queue, particles=q_points, targets=q_points, bbox=bbox2, max_particles_in_box=q_order**2 * 4 - 1, kind="adaptive-level-restricted", ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav, _ = tg(queue, tree) # }}} End build tree and traversals # {{{ build near field potential table from volumential.table_manager import NearFieldInteractionTableManager import os if download_table and (not os.path.isfile(table_filename)): import json with open("table_urls.json", 'r') as fp: urls = json.load(fp) print("Downloading table from %s" % urls['Laplace2D']) import subprocess subprocess.call(["wget", "-q", urls['Laplace2D'], table_filename]) tm = NearFieldInteractionTableManager(table_filename, root_extent=root_table_source_extent, queue=queue) if use_multilevel_table: assert (abs( int((b - a) / root_table_source_extent) * root_table_source_extent - (b - a)) < 1e-15) nftable = [] for lev in range(0, tree.nlevels + 1): print("Getting table at level", lev) tb, _ = tm.get_table( dim, "Laplace", q_order, source_box_level=lev, compute_method="DrosteSum", queue=queue, n_brick_quad_points=100, adaptive_level=False, use_symmetry=True, alpha=0.1, nlevels=15, ) nftable.append(tb) print("Using table list of length", len(nftable)) else: nftable, _ = tm.get_table( dim, "Laplace", q_order, force_recompute=False, compute_method="DrosteSum", queue=queue, n_brick_quad_points=100, adaptive_level=False, use_symmetry=True, alpha=0.1, nlevels=15, ) # }}} End build near field potential table # {{{ sumpy expansion for laplace kernel from sumpy.expansion import DefaultExpansionFactory from sumpy.kernel import LaplaceKernel knl = LaplaceKernel(dim) out_kernels = [knl] expn_factory = DefaultExpansionFactory() local_expn_class = expn_factory.get_local_expansion_class(knl) mpole_expn_class = expn_factory.get_multipole_expansion_class(knl) exclude_self = True from volumential.expansion_wrangler_fpnd import ( FPNDExpansionWranglerCodeContainer, FPNDExpansionWrangler) wcc = FPNDExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=exclude_self, ) if exclude_self: target_to_source = np.arange(tree.ntargets, dtype=np.int32) self_extra_kwargs = {"target_to_source": target_to_source} else: self_extra_kwargs = {} wrangler = FPNDExpansionWrangler( code_container=wcc, queue=queue, tree=tree, near_field_table=nftable, dtype=dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: m_order, quad_order=q_order, self_extra_kwargs=self_extra_kwargs, ) # }}} End sumpy expansion for laplace kernel print("*************************") print("* Performing FMM ...") print("*************************") # {{{ conduct fmm computation from volumential.volume_fmm import drive_volume_fmm import time queue.finish() t0 = time.time() pot, = drive_volume_fmm( trav, wrangler, source_vals * q_weights, source_vals, direct_evaluation=force_direct_evaluation, ) queue.finish() t1 = time.time() print("Finished in %.2f seconds." % (t1 - t0)) print("(%e points per second)" % (len(q_weights) / (t1 - t0))) # }}} End conduct fmm computation print("*************************") print("* Postprocessing ...") print("*************************") # {{{ postprocess and plot # print(pot) solu_eval = Eval(dim, solu_expr, [x, y]) x = q_points[0].get() y = q_points[1].get() ze = solu_eval(queue, np.array([x, y])) zs = pot.get() print_error = True if print_error: err = np.max(np.abs(ze - zs)) print("Error =", err) # Interpolated surface if 0: h = 0.005 out_x = np.arange(a, b + h, h) out_y = np.arange(a, b + h, h) oxx, oyy = np.meshgrid(out_x, out_y) out_targets = make_obj_array([ cl.array.to_device(queue, oxx.flatten()), cl.array.to_device(queue, oyy.flatten()), ]) from volumential.volume_fmm import interpolate_volume_potential # src = source_field([q.get() for q in q_points]) # src = cl.array.to_device(queue, src) interp_pot = interpolate_volume_potential(out_targets, trav, wrangler, pot) opot = interp_pot.get() import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D plt3d = plt.figure() ax = Axes3D(plt3d) # noqa surf = ax.plot_surface(oxx, oyy, opot.reshape(oxx.shape)) # noqa # ax.scatter(x, y, src.get()) # ax.set_zlim(-0.25, 0.25) plt.draw() plt.show() # Boxtree if 0: import matplotlib.pyplot as plt if dim == 2: # plt.plot(q_points[0].get(), q_points[1].get(), ".") pass from boxtree.visualization import TreePlotter plotter = TreePlotter(tree.get(queue=queue)) plotter.draw_tree(fill=False, edgecolor="black") # plotter.draw_box_numbers() plotter.set_bounding_box() plt.gca().set_aspect("equal") plt.draw() # plt.show() plt.savefig("tree.png") # Direct p2p if 0: print("Performing P2P") pot_direct, = drive_volume_fmm(trav, wrangler, source_vals * q_weights, source_vals, direct_evaluation=True) zds = pot_direct.get() zs = pot.get() print("P2P-FMM diff =", np.max(np.abs(zs - zds))) print("P2P Error =", np.max(np.abs(ze - zds))) """ import matplotlib.pyplot as plt import matplotlib.cm as cm x = q_points[0].get() y = q_points[1].get() plt.scatter(x, y, c=np.log(abs(zs-zds)) / np.log(10), cmap=cm.jet) plt.colorbar() plt.xlabel("Multipole order = " + str(m_order)) plt.draw() plt.show() """ # Scatter plot if 0: import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D x = q_points[0].get() y = q_points[1].get() ze = solu_eval(queue, np.array([x, y])) zs = pot.get() plt3d = plt.figure() ax = Axes3D(plt3d) ax.scatter(x, y, zs, s=1) # ax.scatter(x, y, source_field([q.get() for q in q_points]), s=1) # import matplotlib.cm as cm # ax.scatter(x, y, zs, c=np.log(abs(zs-zds)), cmap=cm.jet) # plt.gca().set_aspect("equal") # ax.set_xlim3d([-1, 1]) # ax.set_ylim3d([-1, 1]) # ax.set_zlim3d([np.min(z), np.max(z)]) # ax.set_zlim3d([-0.002, 0.00]) plt.draw() plt.show()
def drive_test_completeness(ctx, queue, dim, q_order): n_levels = 2 # 2^(n_levels-1) subintervals in 1D, must be at least 2 # bounding box a = -1 b = 1 dtype = np.float64 def source_field(x): assert len(x) == dim return 1 # {{{ generate quad points import volumential.meshgen as mg q_points, q_weights, q_radii = mg.make_uniform_cubic_grid(degree=q_order, level=n_levels, dim=dim) assert len(q_points) == len(q_weights) assert q_points.shape[1] == dim q_points_org = q_points q_points = np.ascontiguousarray(np.transpose(q_points)) from pytools.obj_array import make_obj_array q_points = make_obj_array( [cl.array.to_device(queue, q_points[i]) for i in range(dim)]) q_weights = cl.array.to_device(queue, q_weights) if q_radii is not None: q_radii = cl.array.to_device(queue, q_radii) # }}} # {{{ discretize the source field source_vals = cl.array.to_device( queue, np.array([source_field(qp) for qp in q_points_org])) # }}} End discretize the source field # {{{ build tree and traversals from boxtree.tools import AXIS_NAMES axis_names = AXIS_NAMES[:dim] from pytools import single_valued coord_dtype = single_valued(coord.dtype for coord in q_points) from boxtree.bounding_box import make_bounding_box_dtype bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype) bbox = np.empty(1, bbox_type) for ax in axis_names: bbox["min_" + ax] = a bbox["max_" + ax] = b # tune max_particles_in_box to reconstruct the mesh # TODO: use points from FieldPlotter are used as target points for better # visuals from boxtree import TreeBuilder tb = TreeBuilder(ctx) tree, _ = tb( queue, particles=q_points, targets=q_points, bbox=bbox, max_particles_in_box=q_order**dim * (2**dim) - 1, kind="adaptive-level-restricted", ) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav, _ = tg(queue, tree) # }}} End build tree and traversals from volumential.table_manager import NearFieldInteractionTableManager subprocess.check_call(['rm', '-f', 'nft-test-completeness.hdf5']) with NearFieldInteractionTableManager("nft-test-completeness.hdf5", progress_bar=False) as tm: nft, _ = tm.get_table(dim, "Constant", q_order, queue=queue, n_levels=1, alpha=0, compute_method="DrosteSum", n_brick_quad_points=50, adaptive_level=False, use_symmetry=True) # {{{ expansion wrangler from sumpy.kernel import LaplaceKernel from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion from sumpy.expansion.local import VolumeTaylorLocalExpansion knl = LaplaceKernel(dim) out_kernels = [knl] local_expn_class = VolumeTaylorLocalExpansion mpole_expn_class = VolumeTaylorMultipoleExpansion from volumential.expansion_wrangler_fpnd import ( FPNDExpansionWranglerCodeContainer, FPNDExpansionWrangler) wcc = FPNDExpansionWranglerCodeContainer( ctx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), out_kernels, exclude_self=True, ) wrangler = FPNDExpansionWrangler( code_container=wcc, queue=queue, tree=tree, near_field_table=nft, dtype=dtype, fmm_level_to_order=lambda kernel, kernel_args, tree, lev: 1, quad_order=q_order, ) # }}} End sumpy expansion for laplace kernel pot = wrangler.eval_direct(trav.target_boxes, trav.neighbor_source_boxes_starts, trav.neighbor_source_boxes_lists, mode_coefs=source_vals) pot = pot[0] for p in pot[0]: assert (abs(p - 2**dim) < 1e-8)
def test_from_sep_siblings_rotation_classes(ctx_factory, well_sep_is_n_away): ctx = ctx_factory() queue = cl.CommandQueue(ctx) dims = 3 nparticles = 10**4 dtype = np.float64 # {{{ build tree from pyopencl.clrandom import PhiloxGenerator rng = PhiloxGenerator(queue.context, seed=15) from pytools.obj_array import make_obj_array particles = make_obj_array([ rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)]) from boxtree import TreeBuilder tb = TreeBuilder(ctx) queue.finish() tree, _ = tb(queue, particles, max_particles_in_box=30, debug=True) # }}} # {{{ build traversal from boxtree.traversal import FMMTraversalBuilder from boxtree.rotation_classes import RotationClassesBuilder tg = FMMTraversalBuilder(ctx, well_sep_is_n_away=well_sep_is_n_away) trav, _ = tg(queue, tree) rb = RotationClassesBuilder(ctx) result, _ = rb(queue, trav, tree) rot_classes = result.from_sep_siblings_rotation_classes.get(queue) rot_angles = result.from_sep_siblings_rotation_class_to_angle.get(queue) tree = tree.get(queue=queue) trav = trav.get(queue=queue) centers = tree.box_centers.T # }}} # For each entry of from_sep_siblings, compute the source-target translation # direction as a vector, and check that the from_sep_siblings rotation class # in the traversal corresponds to the angle with the z-axis of the # translation direction. for itgt_box, tgt_ibox in enumerate(trav.target_or_target_parent_boxes): start, end = trav.from_sep_siblings_starts[itgt_box:itgt_box+2] seps = trav.from_sep_siblings_lists[start:end] level_rot_classes = rot_classes[start:end] translation_vecs = centers[tgt_ibox] - centers[seps] theta = np.arctan2( la.norm(translation_vecs[:, :dims - 1], axis=1), translation_vecs[:, dims - 1]) level_rot_angles = rot_angles[level_rot_classes] assert np.allclose(theta, level_rot_angles, atol=1e-13, rtol=1e-13)