示例#1
0
def test_random(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import RanluxGenerator

    if has_double_support(context.devices[0]):
        dtypes = [np.float32, np.float64]
    else:
        dtypes = [np.float32]

    gen = RanluxGenerator(queue, 5120)

    for ary_size in [300, 301, 302, 303, 10007]:
        for dtype in dtypes:
            ran = cl_array.zeros(queue, ary_size, dtype)
            gen.fill_uniform(ran)
            assert (0 < ran.get()).all()
            assert (ran.get() < 1).all()

            gen.synchronize(queue)

            ran = cl_array.zeros(queue, ary_size, dtype)
            gen.fill_uniform(ran, a=4, b=7)
            assert (4 < ran.get()).all()
            assert (ran.get() < 7).all()

            ran = gen.normal(queue, (10007,), dtype, mu=4, sigma=3)

    dtypes = [np.int32]
    for dtype in dtypes:
        ran = gen.uniform(queue, (10000007,), dtype, a=200, b=300)
        assert (200 <= ran.get()).all()
        assert (ran.get() < 300).all()
示例#2
0
def test_random_int_in_range(ctx_factory, dtype):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import RanluxGenerator
    gen = RanluxGenerator(queue, 5120)

    if (dtype == np.int64
            and context.devices[0].platform.vendor.startswith("Advanced Micro")):
        pytest.xfail("AMD miscompiles 64-bit RNG math")

    ran = gen.uniform(queue, (10000007,), dtype, a=200, b=300)
    assert (200 <= ran.get()).all()
    assert (ran.get() < 300).all()
示例#3
0
def test_sort(ctx_factory, scan_kernel):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtype = np.int32

    from pyopencl.algorithm import RadixSort
    sort = RadixSort(context,
                     "int *ary",
                     key_expr="ary[i]",
                     sort_arg_names=["ary"],
                     scan_kernel=scan_kernel)

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=15)

    from time import time

    # intermediate arrays for largest size cause out-of-memory on low-end GPUs
    for n in scan_test_counts[:-1]:
        if n >= 2000 and isinstance(scan_kernel, GenericDebugScanKernel):
            continue

        print(n)

        print("  rng")
        a_dev = rng.uniform(queue, (n, ), dtype=dtype, a=0, b=2**16)
        a = a_dev.get()

        dev_start = time()
        print("  device")
        (a_dev_sorted, ), evt = sort(a_dev, key_bits=16)
        queue.finish()
        dev_end = time()
        print("  numpy")
        a_sorted = np.sort(a)
        numpy_end = time()

        numpy_elapsed = numpy_end - dev_end
        dev_elapsed = dev_end - dev_start
        print("  dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx" %
              (1e-6 * n / dev_elapsed, 1e-6 * n / numpy_elapsed,
               numpy_elapsed / dev_elapsed))
        assert (a_dev_sorted.get() == a_sorted).all()
示例#4
0
def test_sort(ctx_factory):
    from pytest import importorskip

    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtype = np.int32

    from pyopencl.algorithm import RadixSort

    sort = RadixSort(context, "int *ary", key_expr="ary[i]", sort_arg_names=["ary"])

    from pyopencl.clrandom import RanluxGenerator

    rng = RanluxGenerator(queue, seed=15)

    from time import time

    # intermediate arrays for largest size cause out-of-memory on low-end GPUs
    for n in scan_test_counts[:-1]:
        print(n)

        print("  rng")
        a_dev = rng.uniform(queue, (n,), dtype=dtype, a=0, b=2 ** 16)
        a = a_dev.get()

        dev_start = time()
        print("  device")
        (a_dev_sorted,), evt = sort(a_dev, key_bits=16)
        queue.finish()
        dev_end = time()
        print("  numpy")
        a_sorted = np.sort(a)
        numpy_end = time()

        numpy_elapsed = numpy_end - dev_end
        dev_elapsed = dev_end - dev_start
        print(
            "  dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx"
            % (1e-6 * n / dev_elapsed, 1e-6 * n / numpy_elapsed, numpy_elapsed / dev_elapsed)
        )
        assert (a_dev_sorted.get() == a_sorted).all()
示例#5
0
def test_sort(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtype = np.int32

    from pyopencl.algorithm import RadixSort
    sort = RadixSort(context, "int *ary", key_expr="ary[i]",
            sort_arg_names=["ary"])

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=15)

    from time import time

    for n in scan_test_counts:
        print(n)

        print("  rng")
        a_dev = rng.uniform(queue, (n,), dtype=dtype, a=0, b=2**16)
        a = a_dev.get()

        dev_start = time()
        print("  device")
        a_dev_sorted, = sort(a_dev, key_bits=16)
        queue.finish()
        dev_end = time()
        print("  numpy")
        a_sorted = np.sort(a)
        numpy_end = time()

        numpy_elapsed = numpy_end-dev_end
        dev_elapsed = dev_end-dev_start
        print ("  dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx" % (
                1e-6*n/dev_elapsed, 1e-6*n/numpy_elapsed, numpy_elapsed/dev_elapsed))
        assert (a_dev_sorted.get() == a_sorted).all()
示例#6
0
def test_pyfmmlib_fmm(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip

    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dims = 2
    dtype = np.float64

    helmholtz_k = 2

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0])

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder

    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import RanluxGenerator

    rng = RanluxGenerator(queue, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    # weights = np.ones(nsources)

    logger.info("computing direct (reference) result")

    from pyfmmlib import hpotgrad2dall_vec

    ref_pot, _, _ = hpotgrad2dall_vec(
        ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k
    )

    from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler

    wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10)

    from boxtree.fmm import drive_fmm

    pot = drive_fmm(trav, wrangler, weights)

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("relative l2 error: %g" % rel_err)
    assert rel_err < 1e-5
示例#7
0
def test_fmm_completeness(
    ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind
):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array " "generation")

    from pyopencl.clrandom import RanluxGenerator

    rng = RanluxGenerator(queue, seed=13)
    if sources_have_extent:
        source_radii = 2 ** rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2 ** rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)

    tree, _ = tb(
        queue,
        sources,
        targets=targets,
        max_particles_in_box=30,
        source_radii=source_radii,
        target_radii=target_radii,
        debug=True,
    )
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt

        pt.show()

    from boxtree.traversal import FMMTraversalBuilder

    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)
    if trav.sep_close_smaller_starts is not None:
        trav = trav.merge_close_lists(queue)

    weights = np.random.randn(nsources)
    # weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2).astype(np.int8)
        if filter_kind == "user":
            from boxtree.tree import filter_target_lists_in_user_order

            filtered_targets = filter_target_lists_in_user_order(queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                host_tree, filtered_targets.get(queue=queue)
            )
        elif filter_kind == "tree":
            from boxtree.tree import filter_target_lists_in_tree_order

            filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                host_tree, filtered_targets.get(queue=queue)
            )
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm

    pot = drive_fmm(host_trav, wrangler, weights)

    # {{{ build, evaluate matrix (and identify missing interactions)

    if 0:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 1:
            pt.spy(mat)
            pt.show()

        missing_tgts, missing_srcs = np.where(mat == 0)

        if 1 and len(missing_tgts):
            from boxtree.visualization import TreePlotter

            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_missing_tgts = host_tree.indices_to_tree_target_order(missing_tgts)
            tree_order_missing_srcs = host_tree.indices_to_tree_source_order(missing_srcs)

            src_boxes = [host_tree.find_box_nr_for_source(i) for i in tree_order_missing_srcs]
            tgt_boxes = [host_tree.find_box_nr_for_target(i) for i in tree_order_missing_tgts]
            print(src_boxes)
            print(tgt_boxes)

            pt.plot(host_tree.targets[0][tree_order_missing_tgts], host_tree.targets[1][tree_order_missing_tgts], "rv")
            pt.plot(host_tree.sources[0][tree_order_missing_srcs], host_tree.sources[1][tree_order_missing_srcs], "go")
            pt.gca().set_aspect("equal")

            pt.show()

    # }}}

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8
    if 0 and not good:
        import matplotlib.pyplot as pt

        pt.plot(pot - weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt

        filt_targets = [host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0]]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [filt_targets[0][bad], filt_targets[1][bad]]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
示例#8
0
def test_pyfmmlib_fmm(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dims = 2
    dtype = np.float64

    helmholtz_k = 2

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0]))

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    #weights = np.ones(nsources)

    logger.info("computing direct (reference) result")

    from pyfmmlib import hpotgrad2dall_vec
    ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False,
                                      ifhess=False,
                                      sources=sources_host.T,
                                      charge=weights,
                                      targets=targets_host.T,
                                      zk=helmholtz_k)

    from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler
    wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10)

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(trav, wrangler, weights)

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("relative l2 error: %g" % rel_err)
    assert rel_err < 1e-5
示例#9
0
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
                          who_has_extent, source_gen, target_gen, filter_kind):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array "
                    "generation")

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=13)
    if sources_have_extent:
        source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 source_radii=source_radii,
                 target_radii=target_radii,
                 debug=True)
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt
        pt.show()

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)
    if trav.sep_close_smaller_starts is not None:
        trav = trav.merge_close_lists(queue)

    weights = np.random.randn(nsources)
    #weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \
                .astype(np.int8)
        if filter_kind == "user":
            from boxtree.tree import filter_target_lists_in_user_order
            filtered_targets = filter_target_lists_in_user_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                host_tree, filtered_targets.get(queue=queue))
        elif filter_kind == "tree":
            from boxtree.tree import filter_target_lists_in_tree_order
            filtered_targets = filter_target_lists_in_tree_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                host_tree, filtered_targets.get(queue=queue))
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(
            wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(host_trav, wrangler, weights)

    # {{{ build, evaluate matrix (and identify missing interactions)

    if 0:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 1:
            pt.spy(mat)
            pt.show()

        missing_tgts, missing_srcs = np.where(mat == 0)

        if 1 and len(missing_tgts):
            from boxtree.visualization import TreePlotter
            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_missing_tgts = \
                    host_tree.indices_to_tree_target_order(missing_tgts)
            tree_order_missing_srcs = \
                    host_tree.indices_to_tree_source_order(missing_srcs)

            src_boxes = [
                host_tree.find_box_nr_for_source(i)
                for i in tree_order_missing_srcs
            ]
            tgt_boxes = [
                host_tree.find_box_nr_for_target(i)
                for i in tree_order_missing_tgts
            ]
            print(src_boxes)
            print(tgt_boxes)

            pt.plot(host_tree.targets[0][tree_order_missing_tgts],
                    host_tree.targets[1][tree_order_missing_tgts], "rv")
            pt.plot(host_tree.sources[0][tree_order_missing_srcs],
                    host_tree.sources[1][tree_order_missing_srcs], "go")
            pt.gca().set_aspect("equal")

            pt.show()

    # }}}

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8
    if 0 and not good:
        import matplotlib.pyplot as pt
        pt.plot(pot - weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt
        filt_targets = [
            host_tree.targets[0][flags.get() > 0],
            host_tree.targets[1][flags.get() > 0],
        ]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [
            filt_targets[0][bad],
            filt_targets[1][bad],
        ]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
示例#10
0
def test_extent_tree(ctx_getter, dims, do_plot=False):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 100000
    ntargets = 200000
    dtype = np.float64
    npoint_sources_per_source = 16

    sources = make_normal_particle_array(queue, nsources, dims, dtype,
            seed=12)
    targets = make_normal_particle_array(queue, ntargets, dims, dtype,
            seed=19)

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=13)
    source_radii = 2**rng.uniform(queue, nsources, dtype=dtype,
            a=-10, b=0)
    target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype,
            a=-10, b=0)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    dev_tree, _ = tb(queue, sources, targets=targets,
            source_radii=source_radii, target_radii=target_radii,
            max_particles_in_box=10, debug=True)

    logger.info("transfer tree, check orderings")

    tree = dev_tree.get(queue=queue)

    sorted_sources = np.array(list(tree.sources))
    sorted_targets = np.array(list(tree.targets))
    sorted_source_radii = tree.source_radii
    sorted_target_radii = tree.target_radii

    unsorted_sources = np.array([pi.get() for pi in sources])
    unsorted_targets = np.array([pi.get() for pi in targets])
    unsorted_source_radii = source_radii.get()
    unsorted_target_radii = target_radii.get()
    assert (sorted_sources
            == unsorted_sources[:, tree.user_source_ids]).all()
    assert (sorted_source_radii
            == unsorted_source_radii[tree.user_source_ids]).all()

    # {{{ test box structure, stick-out criterion

    logger.info("test box structure, stick-out criterion")

    user_target_ids = np.empty(tree.ntargets, dtype=np.intp)
    user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp)
    if ntargets:
        assert (sorted_targets
                == unsorted_targets[:, user_target_ids]).all()
        assert (sorted_target_radii
                == unsorted_target_radii[user_target_ids]).all()

    all_good_so_far = True

    # {{{ check sources, targets

    for ibox in range(tree.nboxes):
        extent_low, extent_high = tree.get_box_extent(ibox)

        box_radius = np.max(extent_high-extent_low) * 0.5
        stick_out_dist = tree.stick_out_factor * box_radius

        assert (extent_low >=
                tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox
        assert (extent_high <=
                tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox

        box_children = tree.box_child_ids[:, ibox]
        existing_children = box_children[box_children != 0]

        assert (tree.box_source_counts_nonchild[ibox]
                + np.sum(tree.box_source_counts_cumul[existing_children])
                == tree.box_source_counts_cumul[ibox])
        assert (tree.box_target_counts_nonchild[ibox]
                + np.sum(tree.box_target_counts_cumul[existing_children])
                == tree.box_target_counts_cumul[ibox])

        for what, starts, counts, points, radii in [
                ("source", tree.box_source_starts, tree.box_source_counts_cumul,
                    sorted_sources, sorted_source_radii),
                ("target", tree.box_target_starts, tree.box_target_counts_cumul,
                    sorted_targets, sorted_target_radii),
                ]:
            bstart = starts[ibox]
            bslice = slice(bstart, bstart+counts[ibox])
            check_particles = points[:, bslice]
            check_radii = radii[bslice]

            good = (
                    (check_particles + check_radii
                        < extent_high[:, np.newaxis] + stick_out_dist)
                    &
                    (extent_low[:, np.newaxis] - stick_out_dist
                        <= check_particles - check_radii)
                    ).all(axis=0)

            all_good_here = good.all()

            if not all_good_here:
                print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox]))

            all_good_so_far = all_good_so_far and all_good_here
            assert all_good_here

    # }}}

    assert all_good_so_far

    # }}}

    # {{{ create, link point sources

    logger.info("creating point sources")

    np.random.seed(20)

    from pytools.obj_array import make_obj_array
    point_sources = make_obj_array([
            cl.array.to_device(queue,
                unsorted_sources[i][:, np.newaxis]
                + unsorted_source_radii[:, np.newaxis]
                * np.random.uniform(
                    -1, 1, size=(nsources, npoint_sources_per_source))
                 )
            for i in range(dims)])

    point_source_starts = cl.array.arange(queue,
            0, (nsources+1)*npoint_sources_per_source, npoint_sources_per_source,
            dtype=tree.particle_id_dtype)

    from boxtree.tree import link_point_sources
    dev_tree = link_point_sources(queue, dev_tree,
            point_source_starts, point_sources,
            debug=True)