def test_cost_model_order_varying_by_level(ctx_factory): """For FMM order varying by level, this checks to ensure that the costs are different. The varying-level case should have larger cost. """ cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) # {{{ constant level to order def level_to_order_constant(kernel, kernel_args, tree, level): return 1 lpot_source = get_lpot_source(actx, 2).copy( cost_model=QBXCostModel(), fmm_level_to_order=level_to_order_constant) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(2) sym_op = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) sigma = get_density(actx, density_discr) cost_constant, metadata = bind(places, sym_op).cost_per_stage( "constant_one", sigma=sigma) cost_constant = one(cost_constant.values()) metadata = one(metadata.values()) # }}} # {{{ varying level to order def level_to_order_varying(kernel, kernel_args, tree, level): return metadata["nlevels"] - level lpot_source = get_lpot_source(actx, 2).copy( cost_model=QBXCostModel(), fmm_level_to_order=level_to_order_varying) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma = get_density(actx, density_discr) cost_varying, _ = bind(lpot_source, sym_op).cost_per_stage( "constant_one", sigma=sigma) cost_varying = one(cost_varying.values()) # }}} assert sum(cost_varying.values()) > sum(cost_constant.values())
def calibrate_cost_model(ctx): queue = cl.CommandQueue(ctx) actx = PyOpenCLArrayContext(queue, force_device_scalars=True) cost_model = QBXCostModel() model_results = [] timing_results = [] for lpot_source in training_geometries(actx): lpot_source = lpot_source.copy(cost_model=cost_model) from pytential import GeometryCollection places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) bound_op = get_bound_op(places) sigma = get_test_density(actx, density_discr) modeled_cost, _ = bound_op.cost_per_stage("constant_one", sigma=sigma) # Warm-up run. bound_op.eval({"sigma": sigma}, array_context=actx) for _ in range(RUNS): timing_data = {} bound_op.eval({"sigma": sigma}, array_context=actx, timing_data=timing_data) model_results.append(modeled_cost) timing_results.append(timing_data) calibration_params = cost_model.estimate_kernel_specific_calibration_params( model_results, timing_results, time_field_name="process_elapsed") return calibration_params
def test_cost_model(ctx, calibration_params): queue = cl.CommandQueue(ctx) actx = PyOpenCLArrayContext(queue, force_device_scalars=True) cost_model = QBXCostModel() for lpot_source in test_geometries(actx): lpot_source = lpot_source.copy(cost_model=cost_model) from pytential import GeometryCollection places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) bound_op = get_bound_op(places) sigma = get_test_density(actx, density_discr) cost_S, _ = bound_op.cost_per_stage(calibration_params, sigma=sigma) model_result = one(cost_S.values()) # Warm-up run. bound_op.eval({"sigma": sigma}, array_context=actx) temp_timing_results = [] for _ in range(RUNS): timing_data = {} bound_op.eval({"sigma": sigma}, array_context=actx, timing_data=timing_data) temp_timing_results.append(one(timing_data.values())) timing_result = {} for param in model_result: timing_result[param] = (sum( temp_timing_result[param]["process_elapsed"] for temp_timing_result in temp_timing_results)) / RUNS from pytools import Table table = Table() table.add_row(["stage", "actual (s)", "predicted (s)"]) for stage in model_result: row = [ stage, f"{timing_result[stage]:.2f}", f"{model_result[stage]:.2f}", ] table.add_row(row) print(table)
def test_cost_model(ctx_factory, dim, use_target_specific_qbx, per_box): """Test that cost model gathering can execute successfully.""" cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) lpot_source = get_lpot_source(actx, dim).copy( _use_target_specific_qbx=use_target_specific_qbx, cost_model=QBXCostModel()) places = GeometryCollection(lpot_source) density_discr = places.get_discretization(places.auto_source.geometry) sigma = get_density(actx, density_discr) sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) op_S = bind(places, sym_op_S) if per_box: cost_S, _ = op_S.cost_per_box("constant_one", sigma=sigma) else: cost_S, _ = op_S.cost_per_stage("constant_one", sigma=sigma) assert len(cost_S) == 1 sym_op_S_plus_D = ( sym.S(k_sym, sigma_sym, qbx_forced_limit=+1) + sym.D(k_sym, sigma_sym, qbx_forced_limit="avg")) op_S_plus_D = bind(places, sym_op_S_plus_D) if per_box: cost_S_plus_D, _ = op_S_plus_D.cost_per_box( "constant_one", sigma=sigma ) else: cost_S_plus_D, _ = op_S_plus_D.cost_per_stage( "constant_one", sigma=sigma ) assert len(cost_S_plus_D) == 2
def __init__( self, density_discr, fine_order, qbx_order=None, fmm_order=None, fmm_level_to_order=None, expansion_factory=None, target_association_tolerance=_not_provided, # begin experimental arguments # FIXME default debug=False once everything has matured debug=True, _disable_refinement=False, _expansions_in_tree_have_extent=True, _expansion_stick_out_factor=0.5, _well_sep_is_n_away=2, _max_leaf_refine_weight=None, _box_extent_norm=None, _from_sep_smaller_crit=None, _from_sep_smaller_min_nsources_cumul=None, _tree_kind="adaptive", _use_target_specific_qbx=None, geometry_data_inspector=None, cost_model=None, fmm_backend="sumpy", target_stick_out_factor=_not_provided): """ :arg fine_order: The total degree to which the (upsampled) underlying quadrature is exact. :arg fmm_order: `False` for direct calculation. May not be given if *fmm_level_to_order* is given. :arg fmm_level_to_order: A function that takes arguments of *(kernel, kernel_args, tree, level)* and returns the expansion order to be used on a given *level* of *tree* with *kernel*, where *kernel* is the :class:`sumpy.kernel.Kernel` being evaluated, and *kernel_args* is a set of *(key, value)* tuples with evaluated kernel arguments. May not be given if *fmm_order* is given. Experimental arguments without a promise of forward compatibility: :arg _use_target_specific_qbx: Whether to use target-specific acceleration by default if possible. *None* means "use if possible". :arg cost_model: Either *None* or an object implementing the :class:`~pytential.qbx.cost.AbstractQBXCostModel` interface, used for gathering modeled costs if provided (experimental) """ # {{{ argument processing if fine_order is None: raise ValueError("fine_order must be provided.") if qbx_order is None: raise ValueError("qbx_order must be provided.") if target_stick_out_factor is not _not_provided: from warnings import warn warn( "target_stick_out_factor has been renamed to " "target_association_tolerance. " "Using target_stick_out_factor is deprecated " "and will stop working in 2018.", DeprecationWarning, stacklevel=2) if target_association_tolerance is not _not_provided: raise TypeError( "May not pass both target_association_tolerance and " "target_stick_out_factor.") target_association_tolerance = target_stick_out_factor del target_stick_out_factor if target_association_tolerance is _not_provided: target_association_tolerance = float( np.finfo(density_discr.real_dtype).eps) * 1e3 if fmm_order is not None and fmm_level_to_order is not None: raise TypeError( "may not specify both fmm_order and fmm_level_to_order") if _box_extent_norm is None: _box_extent_norm = "l2" if _from_sep_smaller_crit is None: # This seems to win no matter what the box extent norm is # https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/10 _from_sep_smaller_crit = "precise_linf" if fmm_level_to_order is None: if fmm_order is False: fmm_level_to_order = False else: def fmm_level_to_order(kernel, kernel_args, tree, level): # noqa pylint:disable=function-redefined return fmm_order if _max_leaf_refine_weight is None: if density_discr.ambient_dim == 2: # FIXME: This should be verified now that l^2 is the default. _max_leaf_refine_weight = 64 elif density_discr.ambient_dim == 3: # For static_linf/linf: https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/8#note_25009 # noqa # For static_l2/l2: https://gitlab.tiker.net/papers/2017-qbx-fmm-3d/issues/12 # noqa _max_leaf_refine_weight = 512 else: # Just guessing... _max_leaf_refine_weight = 64 if _from_sep_smaller_min_nsources_cumul is None: # See here for the comment thread that led to these defaults: # https://gitlab.tiker.net/inducer/boxtree/merge_requests/28#note_18661 if density_discr.dim == 1: _from_sep_smaller_min_nsources_cumul = 15 else: _from_sep_smaller_min_nsources_cumul = 30 # }}} LayerPotentialSourceBase.__init__(self, density_discr) self.fine_order = fine_order self.qbx_order = qbx_order self.fmm_level_to_order = fmm_level_to_order assert target_association_tolerance is not None self.target_association_tolerance = target_association_tolerance self.fmm_backend = fmm_backend if expansion_factory is None: from sumpy.expansion import DefaultExpansionFactory expansion_factory = DefaultExpansionFactory() self.expansion_factory = expansion_factory self.debug = debug self._disable_refinement = _disable_refinement self._expansions_in_tree_have_extent = \ _expansions_in_tree_have_extent self._expansion_stick_out_factor = _expansion_stick_out_factor self._well_sep_is_n_away = _well_sep_is_n_away self._max_leaf_refine_weight = _max_leaf_refine_weight self._box_extent_norm = _box_extent_norm self._from_sep_smaller_crit = _from_sep_smaller_crit self._from_sep_smaller_min_nsources_cumul = \ _from_sep_smaller_min_nsources_cumul self._tree_kind = _tree_kind self._use_target_specific_qbx = _use_target_specific_qbx self.geometry_data_inspector = geometry_data_inspector if cost_model is None: from pytential.qbx.cost import QBXCostModel cost_model = QBXCostModel() self.cost_model = cost_model
def test_cost_model_correctness(ctx_factory, dim, off_surface, use_target_specific_qbx): """Check that computed cost matches that of a constant-one FMM.""" cl_ctx = ctx_factory() queue = cl.CommandQueue(cl_ctx) actx = PyOpenCLArrayContext(queue) cost_model = QBXCostModel( translation_cost_model_factory=OpCountingTranslationCostModel) lpot_source = get_lpot_source(actx, dim).copy( cost_model=cost_model, _use_target_specific_qbx=use_target_specific_qbx) # Construct targets. if off_surface: from pytential.target import PointsTarget from boxtree.tools import make_uniform_particle_array ntargets = 10 ** 3 targets = PointsTarget( make_uniform_particle_array(queue, ntargets, dim, np.float)) target_discrs_and_qbx_sides = ((targets, 0),) qbx_forced_limit = None else: targets = lpot_source.density_discr target_discrs_and_qbx_sides = ((targets, 1),) qbx_forced_limit = 1 places = GeometryCollection((lpot_source, targets)) source_dd = places.auto_source density_discr = places.get_discretization(source_dd.geometry) # Construct bound op, run cost model. sigma_sym = sym.var("sigma") k_sym = LaplaceKernel(lpot_source.ambient_dim) sym_op_S = sym.S(k_sym, sigma_sym, qbx_forced_limit=qbx_forced_limit) op_S = bind(places, sym_op_S) sigma = get_density(actx, density_discr) from pytools import one modeled_time, _ = op_S.cost_per_stage("constant_one", sigma=sigma) modeled_time = one(modeled_time.values()) # Run FMM with ConstantOneWrangler. This can't be done with pytential's # high-level interface, so call the FMM driver directly. from pytential.qbx.fmm import drive_fmm geo_data = lpot_source.qbx_fmm_geometry_data( places, source_dd.geometry, target_discrs_and_qbx_sides=target_discrs_and_qbx_sides) wrangler = ConstantOneQBXExpansionWrangler( queue, geo_data, use_target_specific_qbx) quad_stage2_density_discr = places.get_discretization( source_dd.geometry, sym.QBX_SOURCE_QUAD_STAGE2) ndofs = quad_stage2_density_discr.ndofs src_weights = np.ones(ndofs) timing_data = {} potential = drive_fmm(wrangler, (src_weights,), timing_data, traversal=wrangler.trav)[0][geo_data.ncenters:] # Check constant one wrangler for correctness. assert (potential == ndofs).all() # Check that the cost model matches the timing data returned by the # constant one wrangler. mismatches = [] for stage in timing_data: if stage not in modeled_time: assert timing_data[stage]["ops_elapsed"] == 0 else: if timing_data[stage]["ops_elapsed"] != modeled_time[stage]: mismatches.append( (stage, timing_data[stage]["ops_elapsed"], modeled_time[stage])) assert not mismatches, "\n".join(str(s) for s in mismatches) # {{{ Test per-box cost total_cost = 0.0 for stage in timing_data: total_cost += timing_data[stage]["ops_elapsed"] per_box_cost, _ = op_S.cost_per_box("constant_one", sigma=sigma) print(per_box_cost) per_box_cost = one(per_box_cost.values()) total_aggregate_cost = cost_model.aggregate_over_boxes(per_box_cost) assert total_cost == ( total_aggregate_cost + modeled_time["coarsen_multipoles"] + modeled_time["refine_locals"] )
def test_compare_cl_and_py_cost_model(ctx_factory): nelements = 3600 target_order = 16 fmm_order = 5 qbx_order = fmm_order ctx = ctx_factory() queue = cl.CommandQueue(ctx) actx = PyOpenCLArrayContext(queue) # {{{ Construct geometry from meshmode.mesh.generation import make_curve_mesh, starfish mesh = make_curve_mesh(starfish, np.linspace(0, 1, nelements), target_order) from meshmode.discretization import Discretization from meshmode.discretization.poly_element import \ InterpolatoryQuadratureSimplexGroupFactory pre_density_discr = Discretization( actx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order) ) qbx = QBXLayerPotentialSource( pre_density_discr, 4 * target_order, qbx_order, fmm_order=fmm_order ) places = GeometryCollection(qbx) from pytential.qbx.refinement import refine_geometry_collection places = refine_geometry_collection(places) target_discrs_and_qbx_sides = tuple([(qbx.density_discr, 0)]) geo_data_dev = qbx.qbx_fmm_geometry_data( places, places.auto_source.geometry, target_discrs_and_qbx_sides ) from pytential.qbx.utils import ToHostTransferredGeoDataWrapper geo_data = ToHostTransferredGeoDataWrapper(queue, geo_data_dev) # }}} # {{{ Construct cost models cl_cost_model = QBXCostModel() python_cost_model = _PythonQBXCostModel() tree = geo_data.tree() xlat_cost = make_pde_aware_translation_cost_model( tree.targets.shape[0], tree.nlevels ) constant_one_params = QBXCostModel.get_unit_calibration_params() constant_one_params["p_qbx"] = 5 for ilevel in range(tree.nlevels): constant_one_params["p_fmm_lev%d" % ilevel] = 10 cl_cost_factors = cl_cost_model.qbx_cost_factors_for_kernels_from_model( queue, tree.nlevels, xlat_cost, constant_one_params ) python_cost_factors = python_cost_model.qbx_cost_factors_for_kernels_from_model( None, tree.nlevels, xlat_cost, constant_one_params ) # }}} # {{{ Test process_form_qbxl cl_ndirect_sources_per_target_box = ( cl_cost_model.get_ndirect_sources_per_target_box( queue, geo_data_dev.traversal() ) ) queue.finish() start_time = time.time() cl_p2qbxl = cl_cost_model.process_form_qbxl( queue, geo_data_dev, cl_cost_factors["p2qbxl_cost"], cl_ndirect_sources_per_target_box ) queue.finish() logger.info("OpenCL time for process_form_qbxl: {}".format( str(time.time() - start_time) )) python_ndirect_sources_per_target_box = ( python_cost_model.get_ndirect_sources_per_target_box( queue, geo_data.traversal() ) ) start_time = time.time() python_p2qbxl = python_cost_model.process_form_qbxl( queue, geo_data, python_cost_factors["p2qbxl_cost"], python_ndirect_sources_per_target_box ) logger.info("Python time for process_form_qbxl: {}".format( str(time.time() - start_time) )) assert np.array_equal(cl_p2qbxl.get(), python_p2qbxl) # }}} # {{{ Test process_m2qbxl queue.finish() start_time = time.time() cl_m2qbxl = cl_cost_model.process_m2qbxl( queue, geo_data_dev, cl_cost_factors["m2qbxl_cost"] ) queue.finish() logger.info("OpenCL time for process_m2qbxl: {}".format( str(time.time() - start_time) )) start_time = time.time() python_m2qbxl = python_cost_model.process_m2qbxl( queue, geo_data, python_cost_factors["m2qbxl_cost"] ) logger.info("Python time for process_m2qbxl: {}".format( str(time.time() - start_time) )) assert np.array_equal(cl_m2qbxl.get(), python_m2qbxl) # }}} # {{{ Test process_l2qbxl queue.finish() start_time = time.time() cl_l2qbxl = cl_cost_model.process_l2qbxl( queue, geo_data_dev, cl_cost_factors["l2qbxl_cost"] ) queue.finish() logger.info("OpenCL time for process_l2qbxl: {}".format( str(time.time() - start_time) )) start_time = time.time() python_l2qbxl = python_cost_model.process_l2qbxl( queue, geo_data, python_cost_factors["l2qbxl_cost"] ) logger.info("Python time for process_l2qbxl: {}".format( str(time.time() - start_time) )) assert np.array_equal(cl_l2qbxl.get(), python_l2qbxl) # }}} # {{{ Test process_eval_qbxl queue.finish() start_time = time.time() cl_eval_qbxl = cl_cost_model.process_eval_qbxl( queue, geo_data_dev, cl_cost_factors["qbxl2p_cost"] ) queue.finish() logger.info("OpenCL time for process_eval_qbxl: {}".format( str(time.time() - start_time) )) start_time = time.time() python_eval_qbxl = python_cost_model.process_eval_qbxl( queue, geo_data, python_cost_factors["qbxl2p_cost"] ) logger.info("Python time for process_eval_qbxl: {}".format( str(time.time() - start_time) )) assert np.array_equal(cl_eval_qbxl.get(), python_eval_qbxl) # }}} # {{{ Test eval_target_specific_qbxl queue.finish() start_time = time.time() cl_eval_target_specific_qbxl = cl_cost_model.process_eval_target_specific_qbxl( queue, geo_data_dev, cl_cost_factors["p2p_tsqbx_cost"], cl_ndirect_sources_per_target_box ) queue.finish() logger.info("OpenCL time for eval_target_specific_qbxl: {}".format( str(time.time() - start_time) )) start_time = time.time() python_eval_target_specific_qbxl = \ python_cost_model.process_eval_target_specific_qbxl( queue, geo_data, python_cost_factors["p2p_tsqbx_cost"], python_ndirect_sources_per_target_box ) logger.info("Python time for eval_target_specific_qbxl: {}".format( str(time.time() - start_time) )) assert np.array_equal( cl_eval_target_specific_qbxl.get(), python_eval_target_specific_qbxl )