示例#1
0
def test_cost_model_order_varying_by_level(ctx_factory):
    """For FMM order varying by level, this checks to ensure that the costs are
    different. The varying-level case should have larger cost.
    """

    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    # {{{ constant level to order

    def level_to_order_constant(kernel, kernel_args, tree, level):
        return 1

    lpot_source = get_lpot_source(actx, 2).copy(
            cost_model=QBXCostModel(),
            fmm_level_to_order=level_to_order_constant)
    places = GeometryCollection(lpot_source)

    density_discr = places.get_discretization(places.auto_source.geometry)
    sigma_sym = sym.var("sigma")

    k_sym = LaplaceKernel(2)
    sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1)

    sigma = get_density(actx, density_discr)

    cost_constant, metadata = bind(places, sym_op).cost_per_stage(
            "constant_one", sigma=sigma)

    cost_constant = one(cost_constant.values())
    metadata = one(metadata.values())

    # }}}

    # {{{ varying level to order

    def level_to_order_varying(kernel, kernel_args, tree, level):
        return metadata["nlevels"] - level

    lpot_source = get_lpot_source(actx, 2).copy(
            cost_model=QBXCostModel(),
            fmm_level_to_order=level_to_order_varying)
    places = GeometryCollection(lpot_source)

    density_discr = places.get_discretization(places.auto_source.geometry)

    sigma = get_density(actx, density_discr)

    cost_varying, _ = bind(lpot_source, sym_op).cost_per_stage(
        "constant_one", sigma=sigma)

    cost_varying = one(cost_varying.values())

    # }}}

    assert sum(cost_varying.values()) > sum(cost_constant.values())
示例#2
0
def calibrate_cost_model(ctx):
    queue = cl.CommandQueue(ctx)
    actx = PyOpenCLArrayContext(queue, force_device_scalars=True)
    cost_model = QBXCostModel()

    model_results = []
    timing_results = []

    for lpot_source in training_geometries(actx):
        lpot_source = lpot_source.copy(cost_model=cost_model)

        from pytential import GeometryCollection
        places = GeometryCollection(lpot_source)
        density_discr = places.get_discretization(places.auto_source.geometry)

        bound_op = get_bound_op(places)
        sigma = get_test_density(actx, density_discr)

        modeled_cost, _ = bound_op.cost_per_stage("constant_one", sigma=sigma)

        # Warm-up run.
        bound_op.eval({"sigma": sigma}, array_context=actx)

        for _ in range(RUNS):
            timing_data = {}
            bound_op.eval({"sigma": sigma},
                          array_context=actx,
                          timing_data=timing_data)

            model_results.append(modeled_cost)
            timing_results.append(timing_data)

    calibration_params = cost_model.estimate_kernel_specific_calibration_params(
        model_results, timing_results, time_field_name="process_elapsed")

    return calibration_params
示例#3
0
def test_cost_model(ctx, calibration_params):
    queue = cl.CommandQueue(ctx)
    actx = PyOpenCLArrayContext(queue, force_device_scalars=True)
    cost_model = QBXCostModel()

    for lpot_source in test_geometries(actx):
        lpot_source = lpot_source.copy(cost_model=cost_model)

        from pytential import GeometryCollection
        places = GeometryCollection(lpot_source)
        density_discr = places.get_discretization(places.auto_source.geometry)

        bound_op = get_bound_op(places)
        sigma = get_test_density(actx, density_discr)

        cost_S, _ = bound_op.cost_per_stage(calibration_params, sigma=sigma)
        model_result = one(cost_S.values())

        # Warm-up run.
        bound_op.eval({"sigma": sigma}, array_context=actx)

        temp_timing_results = []
        for _ in range(RUNS):
            timing_data = {}
            bound_op.eval({"sigma": sigma},
                          array_context=actx,
                          timing_data=timing_data)
            temp_timing_results.append(one(timing_data.values()))

        timing_result = {}
        for param in model_result:
            timing_result[param] = (sum(
                temp_timing_result[param]["process_elapsed"]
                for temp_timing_result in temp_timing_results)) / RUNS

        from pytools import Table
        table = Table()
        table.add_row(["stage", "actual (s)", "predicted (s)"])
        for stage in model_result:
            row = [
                stage,
                f"{timing_result[stage]:.2f}",
                f"{model_result[stage]:.2f}",
            ]
            table.add_row(row)

        print(table)
示例#4
0
def test_cost_model(ctx_factory, dim, use_target_specific_qbx, per_box):
    """Test that cost model gathering can execute successfully."""
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    lpot_source = get_lpot_source(actx, dim).copy(
            _use_target_specific_qbx=use_target_specific_qbx,
            cost_model=QBXCostModel())
    places = GeometryCollection(lpot_source)

    density_discr = places.get_discretization(places.auto_source.geometry)
    sigma = get_density(actx, density_discr)

    sigma_sym = sym.var("sigma")
    k_sym = LaplaceKernel(lpot_source.ambient_dim)

    sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1)
    op_S = bind(places, sym_op_S)

    if per_box:
        cost_S, _ = op_S.cost_per_box("constant_one", sigma=sigma)
    else:
        cost_S, _ = op_S.cost_per_stage("constant_one", sigma=sigma)

    assert len(cost_S) == 1

    sym_op_S_plus_D = (
            sym.S(k_sym, sigma_sym, qbx_forced_limit=+1)
            + sym.D(k_sym, sigma_sym, qbx_forced_limit="avg"))
    op_S_plus_D = bind(places, sym_op_S_plus_D)

    if per_box:
        cost_S_plus_D, _ = op_S_plus_D.cost_per_box(
            "constant_one", sigma=sigma
        )
    else:
        cost_S_plus_D, _ = op_S_plus_D.cost_per_stage(
            "constant_one", sigma=sigma
        )

    assert len(cost_S_plus_D) == 2
示例#5
0
    def __init__(
            self,
            density_discr,
            fine_order,
            qbx_order=None,
            fmm_order=None,
            fmm_level_to_order=None,
            expansion_factory=None,
            target_association_tolerance=_not_provided,

            # begin experimental arguments
            # FIXME default debug=False once everything has matured
            debug=True,
            _disable_refinement=False,
            _expansions_in_tree_have_extent=True,
            _expansion_stick_out_factor=0.5,
            _well_sep_is_n_away=2,
            _max_leaf_refine_weight=None,
            _box_extent_norm=None,
            _from_sep_smaller_crit=None,
            _from_sep_smaller_min_nsources_cumul=None,
            _tree_kind="adaptive",
            _use_target_specific_qbx=None,
            geometry_data_inspector=None,
            cost_model=None,
            fmm_backend="sumpy",
            target_stick_out_factor=_not_provided):
        """
        :arg fine_order: The total degree to which the (upsampled)
             underlying quadrature is exact.
        :arg fmm_order: `False` for direct calculation. May not be given if
            *fmm_level_to_order* is given.
        :arg fmm_level_to_order: A function that takes arguments of
             *(kernel, kernel_args, tree, level)* and returns the expansion
             order to be used on a given *level* of *tree* with *kernel*, where
             *kernel* is the :class:`sumpy.kernel.Kernel` being evaluated, and
             *kernel_args* is a set of *(key, value)* tuples with evaluated
             kernel arguments. May not be given if *fmm_order* is given.

        Experimental arguments without a promise of forward compatibility:

        :arg _use_target_specific_qbx: Whether to use target-specific
            acceleration by default if possible. *None* means
            "use if possible".
        :arg cost_model: Either *None* or an object implementing the
             :class:`~pytential.qbx.cost.AbstractQBXCostModel` interface, used for
             gathering modeled costs if provided (experimental)
        """

        # {{{ argument processing

        if fine_order is None:
            raise ValueError("fine_order must be provided.")

        if qbx_order is None:
            raise ValueError("qbx_order must be provided.")

        if target_stick_out_factor is not _not_provided:
            from warnings import warn
            warn(
                "target_stick_out_factor has been renamed to "
                "target_association_tolerance. "
                "Using target_stick_out_factor is deprecated "
                "and will stop working in 2018.",
                DeprecationWarning,
                stacklevel=2)

            if target_association_tolerance is not _not_provided:
                raise TypeError(
                    "May not pass both target_association_tolerance and "
                    "target_stick_out_factor.")

            target_association_tolerance = target_stick_out_factor

        del target_stick_out_factor

        if target_association_tolerance is _not_provided:
            target_association_tolerance = float(
                np.finfo(density_discr.real_dtype).eps) * 1e3

        if fmm_order is not None and fmm_level_to_order is not None:
            raise TypeError(
                "may not specify both fmm_order and fmm_level_to_order")

        if _box_extent_norm is None:
            _box_extent_norm = "l2"

        if _from_sep_smaller_crit is None:
            # This seems to win no matter what the box extent norm is
            # https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/10
            _from_sep_smaller_crit = "precise_linf"

        if fmm_level_to_order is None:
            if fmm_order is False:
                fmm_level_to_order = False
            else:

                def fmm_level_to_order(kernel, kernel_args, tree, level):  # noqa pylint:disable=function-redefined
                    return fmm_order

        if _max_leaf_refine_weight is None:
            if density_discr.ambient_dim == 2:
                # FIXME: This should be verified now that l^2 is the default.
                _max_leaf_refine_weight = 64
            elif density_discr.ambient_dim == 3:
                # For static_linf/linf: https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/8#note_25009  # noqa
                # For static_l2/l2: https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/12  # noqa
                _max_leaf_refine_weight = 512
            else:
                # Just guessing...
                _max_leaf_refine_weight = 64

        if _from_sep_smaller_min_nsources_cumul is None:
            # See here for the comment thread that led to these defaults:
            # https://gitlab.tiker.net/inducer/boxtree/merge_requests/28#note_18661
            if density_discr.dim == 1:
                _from_sep_smaller_min_nsources_cumul = 15
            else:
                _from_sep_smaller_min_nsources_cumul = 30

        # }}}

        LayerPotentialSourceBase.__init__(self, density_discr)

        self.fine_order = fine_order
        self.qbx_order = qbx_order
        self.fmm_level_to_order = fmm_level_to_order

        assert target_association_tolerance is not None

        self.target_association_tolerance = target_association_tolerance
        self.fmm_backend = fmm_backend

        if expansion_factory is None:
            from sumpy.expansion import DefaultExpansionFactory
            expansion_factory = DefaultExpansionFactory()
        self.expansion_factory = expansion_factory

        self.debug = debug
        self._disable_refinement = _disable_refinement
        self._expansions_in_tree_have_extent = \
                _expansions_in_tree_have_extent
        self._expansion_stick_out_factor = _expansion_stick_out_factor
        self._well_sep_is_n_away = _well_sep_is_n_away
        self._max_leaf_refine_weight = _max_leaf_refine_weight
        self._box_extent_norm = _box_extent_norm
        self._from_sep_smaller_crit = _from_sep_smaller_crit
        self._from_sep_smaller_min_nsources_cumul = \
                _from_sep_smaller_min_nsources_cumul
        self._tree_kind = _tree_kind
        self._use_target_specific_qbx = _use_target_specific_qbx
        self.geometry_data_inspector = geometry_data_inspector

        if cost_model is None:
            from pytential.qbx.cost import QBXCostModel
            cost_model = QBXCostModel()

        self.cost_model = cost_model
示例#6
0
def test_cost_model_correctness(ctx_factory, dim, off_surface,
        use_target_specific_qbx):
    """Check that computed cost matches that of a constant-one FMM."""
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)
    actx = PyOpenCLArrayContext(queue)

    cost_model = QBXCostModel(
        translation_cost_model_factory=OpCountingTranslationCostModel)

    lpot_source = get_lpot_source(actx, dim).copy(
            cost_model=cost_model,
            _use_target_specific_qbx=use_target_specific_qbx)

    # Construct targets.
    if off_surface:
        from pytential.target import PointsTarget
        from boxtree.tools import make_uniform_particle_array
        ntargets = 10 ** 3
        targets = PointsTarget(
                make_uniform_particle_array(queue, ntargets, dim, np.float))
        target_discrs_and_qbx_sides = ((targets, 0),)
        qbx_forced_limit = None
    else:
        targets = lpot_source.density_discr
        target_discrs_and_qbx_sides = ((targets, 1),)
        qbx_forced_limit = 1
    places = GeometryCollection((lpot_source, targets))

    source_dd = places.auto_source
    density_discr = places.get_discretization(source_dd.geometry)

    # Construct bound op, run cost model.
    sigma_sym = sym.var("sigma")
    k_sym = LaplaceKernel(lpot_source.ambient_dim)
    sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit)

    op_S = bind(places, sym_op_S)
    sigma = get_density(actx, density_discr)

    from pytools import one
    modeled_time, _ = op_S.cost_per_stage("constant_one", sigma=sigma)
    modeled_time = one(modeled_time.values())

    # Run FMM with ConstantOneWrangler. This can't be done with pytential's
    # high-level interface, so call the FMM driver directly.
    from pytential.qbx.fmm import drive_fmm
    geo_data = lpot_source.qbx_fmm_geometry_data(
            places, source_dd.geometry,
            target_discrs_and_qbx_sides=target_discrs_and_qbx_sides)

    wrangler = ConstantOneQBXExpansionWrangler(
            queue, geo_data, use_target_specific_qbx)

    quad_stage2_density_discr = places.get_discretization(
            source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2)
    ndofs = quad_stage2_density_discr.ndofs
    src_weights = np.ones(ndofs)

    timing_data = {}
    potential = drive_fmm(wrangler, (src_weights,), timing_data,
            traversal=wrangler.trav)[0][geo_data.ncenters:]

    # Check constant one wrangler for correctness.
    assert (potential == ndofs).all()

    # Check that the cost model matches the timing data returned by the
    # constant one wrangler.
    mismatches = []
    for stage in timing_data:
        if stage not in modeled_time:
            assert timing_data[stage]["ops_elapsed"] == 0
        else:
            if timing_data[stage]["ops_elapsed"] != modeled_time[stage]:
                mismatches.append(
                    (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage]))

    assert not mismatches, "\n".join(str(s) for s in mismatches)

    # {{{ Test per-box cost

    total_cost = 0.0
    for stage in timing_data:
        total_cost += timing_data[stage]["ops_elapsed"]

    per_box_cost, _ = op_S.cost_per_box("constant_one", sigma=sigma)
    print(per_box_cost)
    per_box_cost = one(per_box_cost.values())

    total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost)
    assert total_cost == (
            total_aggregate_cost
            + modeled_time["coarsen_multipoles"]
            + modeled_time["refine_locals"]
    )
示例#7
0
def test_compare_cl_and_py_cost_model(ctx_factory):
    nelements = 3600
    target_order = 16
    fmm_order = 5
    qbx_order = fmm_order

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    actx = PyOpenCLArrayContext(queue)

    # {{{ Construct geometry

    from meshmode.mesh.generation import make_curve_mesh, starfish
    mesh = make_curve_mesh(starfish, np.linspace(0, 1, nelements), target_order)

    from meshmode.discretization import Discretization
    from meshmode.discretization.poly_element import \
        InterpolatoryQuadratureSimplexGroupFactory
    pre_density_discr = Discretization(
        actx, mesh,
        InterpolatoryQuadratureSimplexGroupFactory(target_order)
    )

    qbx = QBXLayerPotentialSource(
        pre_density_discr, 4 * target_order,
        qbx_order,
        fmm_order=fmm_order
    )
    places = GeometryCollection(qbx)

    from pytential.qbx.refinement import refine_geometry_collection
    places = refine_geometry_collection(places)

    target_discrs_and_qbx_sides = tuple([(qbx.density_discr, 0)])
    geo_data_dev = qbx.qbx_fmm_geometry_data(
        places, places.auto_source.geometry, target_discrs_and_qbx_sides
    )

    from pytential.qbx.utils import ToHostTransferredGeoDataWrapper
    geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data_dev)

    # }}}

    # {{{ Construct cost models

    cl_cost_model = QBXCostModel()
    python_cost_model = _PythonQBXCostModel()

    tree = geo_data.tree()
    xlat_cost = make_pde_aware_translation_cost_model(
        tree.targets.shape[0], tree.nlevels
    )

    constant_one_params = QBXCostModel.get_unit_calibration_params()
    constant_one_params["p_qbx"] = 5
    for ilevel in range(tree.nlevels):
        constant_one_params["p_fmm_lev%d" % ilevel] = 10

    cl_cost_factors = cl_cost_model.qbx_cost_factors_for_kernels_from_model(
        queue, tree.nlevels, xlat_cost, constant_one_params
    )

    python_cost_factors = python_cost_model.qbx_cost_factors_for_kernels_from_model(
        None, tree.nlevels, xlat_cost, constant_one_params
    )

    # }}}

    # {{{ Test process_form_qbxl

    cl_ndirect_sources_per_target_box = (
        cl_cost_model.get_ndirect_sources_per_target_box(
            queue, geo_data_dev.traversal()
        )
    )

    queue.finish()
    start_time = time.time()

    cl_p2qbxl = cl_cost_model.process_form_qbxl(
        queue, geo_data_dev, cl_cost_factors["p2qbxl_cost"],
        cl_ndirect_sources_per_target_box
    )

    queue.finish()
    logger.info("OpenCL time for process_form_qbxl: {}".format(
        str(time.time() - start_time)
    ))

    python_ndirect_sources_per_target_box = (
        python_cost_model.get_ndirect_sources_per_target_box(
            queue, geo_data.traversal()
        )
    )

    start_time = time.time()

    python_p2qbxl = python_cost_model.process_form_qbxl(
        queue, geo_data, python_cost_factors["p2qbxl_cost"],
        python_ndirect_sources_per_target_box
    )

    logger.info("Python time for process_form_qbxl: {}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_p2qbxl.get(), python_p2qbxl)

    # }}}

    # {{{ Test process_m2qbxl

    queue.finish()
    start_time = time.time()

    cl_m2qbxl = cl_cost_model.process_m2qbxl(
        queue, geo_data_dev, cl_cost_factors["m2qbxl_cost"]
    )

    queue.finish()
    logger.info("OpenCL time for process_m2qbxl: {}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_m2qbxl = python_cost_model.process_m2qbxl(
        queue, geo_data, python_cost_factors["m2qbxl_cost"]
    )

    logger.info("Python time for process_m2qbxl: {}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_m2qbxl.get(), python_m2qbxl)

    # }}}

    # {{{ Test process_l2qbxl

    queue.finish()
    start_time = time.time()

    cl_l2qbxl = cl_cost_model.process_l2qbxl(
        queue, geo_data_dev, cl_cost_factors["l2qbxl_cost"]
    )

    queue.finish()
    logger.info("OpenCL time for process_l2qbxl: {}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_l2qbxl = python_cost_model.process_l2qbxl(
        queue, geo_data, python_cost_factors["l2qbxl_cost"]
    )

    logger.info("Python time for process_l2qbxl: {}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_l2qbxl.get(), python_l2qbxl)

    # }}}

    # {{{ Test process_eval_qbxl

    queue.finish()
    start_time = time.time()

    cl_eval_qbxl = cl_cost_model.process_eval_qbxl(
        queue, geo_data_dev, cl_cost_factors["qbxl2p_cost"]
    )

    queue.finish()
    logger.info("OpenCL time for process_eval_qbxl: {}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_eval_qbxl = python_cost_model.process_eval_qbxl(
        queue, geo_data, python_cost_factors["qbxl2p_cost"]
    )

    logger.info("Python time for process_eval_qbxl: {}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(cl_eval_qbxl.get(), python_eval_qbxl)

    # }}}

    # {{{ Test eval_target_specific_qbxl

    queue.finish()
    start_time = time.time()

    cl_eval_target_specific_qbxl = cl_cost_model.process_eval_target_specific_qbxl(
        queue, geo_data_dev, cl_cost_factors["p2p_tsqbx_cost"],
        cl_ndirect_sources_per_target_box
    )

    queue.finish()
    logger.info("OpenCL time for eval_target_specific_qbxl: {}".format(
        str(time.time() - start_time)
    ))

    start_time = time.time()

    python_eval_target_specific_qbxl = \
        python_cost_model.process_eval_target_specific_qbxl(
            queue, geo_data, python_cost_factors["p2p_tsqbx_cost"],
            python_ndirect_sources_per_target_box
        )

    logger.info("Python time for eval_target_specific_qbxl: {}".format(
        str(time.time() - start_time)
    ))

    assert np.array_equal(
        cl_eval_target_specific_qbxl.get(), python_eval_target_specific_qbxl
    )