def _check_and_fix_temp_var_type(temp_var_type, stacklevel=2): """Check temp_var_type for deprecated usage, and convert to the right value. """ import loopy as lp if temp_var_type is None: warn("temp_var_type should be Optional() if no temporary, not None. " "This usage will be disallowed soon.", DeprecationWarning, stacklevel=1 + stacklevel) temp_var_type = lp.Optional() elif temp_var_type is lp.auto: warn("temp_var_type should be Optional(None) if " "unspecified, not auto. This usage will be disallowed soon.", DeprecationWarning, stacklevel=1 + stacklevel) temp_var_type = lp.Optional(None) elif not isinstance(temp_var_type, lp.Optional): warn("temp_var_type should be an instance of Optional. " "Other values for temp_var_type will be disallowed soon.", DeprecationWarning, stacklevel=1 + stacklevel) temp_var_type = lp.Optional(temp_var_type) return temp_var_type
def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[], complex_dtype=None, retain_names=set()): logger.info("loopy instruction generation: start") assignments = list(assignments) # convert from sympy sympy_conv = SympyToPymbolicMapper() assignments = [(name, sympy_conv(expr)) for name, expr in assignments] assignments = kill_trivial_assignments(assignments, retain_names) bdr = BesselDerivativeReplacer() assignments = [(name, bdr(expr)) for name, expr in assignments] btog = BesselTopOrderGatherer() for name, expr in assignments: btog(expr) #from pymbolic.mapper.cse_tagger import CSEWalkMapper, CSETagMapper #cse_walk = CSEWalkMapper() #for name, expr in assignments: # cse_walk(expr) #cse_tag = CSETagMapper(cse_walk) # do the rest of the conversion bessel_sub = BesselSubstitutor(BesselGetter(btog.bessel_j_arg_to_top_order)) vcr = VectorComponentRewriter(vector_names) pwr = PowerRewriter() ssg = SumSignGrouper() fck = FractionKiller() bik = BigIntegerKiller() cmr = ComplexRewriter() def convert_expr(name, expr): logger.debug("generate expression for: %s" % name) expr = bdr(expr) expr = bessel_sub(expr) expr = vcr(expr) expr = pwr(expr) expr = fck(expr) expr = ssg(expr) expr = bik(expr) expr = cmr(expr) #expr = cse_tag(expr) for m in pymbolic_expr_maps: expr = m(expr) return expr import loopy as lp from pytools import MinRecursionLimit with MinRecursionLimit(3000): result = [ lp.Assignment(id=None, assignee=name, expression=convert_expr(name, expr), temp_var_type=lp.Optional(None)) for name, expr in assignments] logger.info("loopy instruction generation: done") return result
def get_kernel_scaling_assignment(self): from sumpy.symbolic import SympyToPymbolicMapper sympy_conv = SympyToPymbolicMapper() return [lp.Assignment(id=None, assignee="kernel_scaling", expression=sympy_conv( self.expansion.kernel.get_global_scaling_const()), temp_var_type=lp.Optional(None))]
def get_kernel_scaling_assignments(self): from sumpy.symbolic import SympyToPymbolicMapper sympy_conv = SympyToPymbolicMapper() import loopy as lp return [ lp.Assignment(id=None, assignee="knl_%d_scaling" % i, expression=sympy_conv(kernel.get_global_scaling_const()), temp_var_type=lp.Optional(dtype)) for i, (kernel, dtype) in enumerate( zip(self.kernels, self.value_dtypes))]
def get_kernel_exprs(self, result_names): isrc_sym = var("isrc") exprs = [ var(name) * self.get_strength_or_not(isrc_sym, i) for i, name in enumerate(result_names) ] return [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.Optional(None)) for i, expr in enumerate(exprs) ]
def get_kernel_exprs(self, result_names): from pymbolic import var isrc_sym = var("isrc") exprs = [ var(name) * self.get_strength_or_not(isrc_sym, i) for i, name in enumerate(result_names) ] if self.exclude_self: from pymbolic.primitives import If, Variable exprs = [If(Variable("is_self"), 0, expr) for expr in exprs] return [ lp.Assignment(id=None, assignee="pair_result_%d" % i, expression=expr, temp_var_type=lp.Optional(None)) for i, expr in enumerate(exprs) ]
def make_kernel(self, map_instructions, tmp_instructions, args, domains, **kwargs): temp_statements = [] temp_vars = [] from pystella.field import index_fields indexed_tmp_insns = index_fields(tmp_instructions) indexed_map_insns = index_fields(map_instructions) for statement in indexed_tmp_insns: if isinstance(statement, lp.InstructionBase): temp_statements += [statement] else: assignee, expression = statement # only declare temporary variables once if isinstance(assignee, pp.Variable): current_tmp = assignee elif isinstance(assignee, pp.Subscript): current_tmp = assignee.aggregate else: current_tmp = None if current_tmp is not None and current_tmp not in temp_vars: temp_vars += [current_tmp] tvt = lp.Optional(None) else: tvt = lp.Optional() temp_statements += [ self._assignment(assignee, expression, temp_var_type=tvt) ] output_statements = [] for statement in indexed_map_insns: if isinstance(statement, lp.InstructionBase): output_statements += [statement] else: assignee, expression = statement temp_statements += [self._assignment(assignee, expression)] options = kwargs.pop("options", lp.Options()) # ignore lack of supposed dependency for single-instruction kernels if len(map_instructions) + len(tmp_instructions) == 1: options.check_dep_resolution = False from pystella import get_field_args inferred_args = get_field_args([map_instructions, tmp_instructions]) all_args = append_new_args(args, inferred_args) t_unit = lp.make_kernel( domains, temp_statements + output_statements, all_args + [lp.ValueArg("Nx, Ny, Nz", dtype="int"), ...], options=options, **kwargs, ) new_args = [] knl = t_unit.default_entrypoint for arg in knl.args: if isinstance(arg, lp.KernelArgument) and arg.dtype is None: new_arg = arg.copy(dtype=self.dtype) new_args.append(new_arg) else: new_args.append(arg) t_unit = t_unit.with_kernel(knl.copy(args=new_args)) t_unit = lp.remove_unused_arguments(t_unit) t_unit = lp.register_callable(t_unit, "round", UnaryOpenCLCallable("round")) return t_unit
def map_insn_assign(self, insn): from grudge.symbolic.primitives import OperatorBinding if ( len(insn.exprs) == 1 and ( isinstance(insn.exprs[0], OperatorBinding) or is_external_call( insn.exprs[0], self.function_registry))): return insn # FIXME: These names and the size names could clash with user-given names. # Need better metadata tracking in loopy. iel = "iel" idof = "idof" temp_names = [ name for name, dnr in zip(insn.names, insn.do_not_return) if dnr] from pymbolic import var expr_mapper = ToLoopyExpressionMapper( self.dd_inference_mapper, temp_names, (var(iel), var(idof))) insns = [] import loopy as lp from pymbolic import var for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return): insns.append( lp.Assignment( expr_mapper(var(name)), expr_mapper(expr), temp_var_type=lp.Optional(None) if dnr else lp.Optional(), no_sync_with=frozenset([ ("*", "any"), ]), )) if not expr_mapper.non_scalar_vars: return insn knl = lp.make_kernel( "{[%(iel)s, %(idof)s]: " "0 <= %(iel)s < nelements and 0 <= %(idof)s < nunit_dofs}" % {"iel": iel, "idof": idof}, insns, name="grudge_assign_%d" % self.insn_count, # Single-insn kernels may have their no_sync_with resolve to an # empty set, that's OK. options=lp.Options( check_dep_resolution=False, return_dict=True, no_numpy=True, ) ) self.insn_count += 1 from pytools import single_valued governing_dd = single_valued( self.dd_inference_mapper(expr) for expr in insn.exprs) knl = lp.register_preamble_generators(knl, [bessel_preamble_generator]) knl = lp.register_function_manglers(knl, [bessel_function_mangler]) input_mappings = {} output_mappings = {} from grudge.symbolic.mappers import DependencyMapper dep_mapper = DependencyMapper(composite_leaves=False) for expr, name in expr_mapper.expr_to_name.items(): deps = dep_mapper(expr) assert len(deps) <= 1 if not deps: is_output = False else: dep, = deps is_output = dep.name in insn.names if is_output: tgt_dict = output_mappings else: tgt_dict = input_mappings tgt_dict[name] = expr return LoopyKernelInstruction( LoopyKernelDescriptor( loopy_kernel=knl, input_mappings=input_mappings, output_mappings=output_mappings, fixed_arguments={}, governing_dd=governing_dd) )
def build_kernel_exterior_normalizer_table(self, cl_ctx, queue, pool=None, ncpus=None, mesh_order=5, quad_order=10, mesh_size=0.03, remove_tmp_files=True, **kwargs): r"""Build the kernel exterior normalizer table for fractional Laplacians. An exterior normalizer for kernel :math:`G(r)` and target :math:`x` is defined as .. math:: \int_{B^c} G(\lVert x - y \rVert) dy where :math:`B` is the source box :math:`[0, source_box_extent]^dim`. """ logger.warn("this method is currently under construction.") if not self.inverse_droste: raise ValueError() if ncpus is None: import multiprocessing ncpus = multiprocessing.cpu_count() if pool is None: from multiprocessing import Pool pool = Pool(ncpus) def fl_scaling(k, s): # scaling constant from scipy.special import gamma return (2**(2 * s) * s * gamma(s + k / 2)) / (np.pi**(k / 2) * gamma(1 - s)) # Directly compute and return in 1D if self.dim == 1: s = self.integral_knl.s targets = np.array(self.q_points).reshape(-1) r1 = targets r2 = self.source_box_extent - targets self.kernel_exterior_normalizers = 1 / (2 * s) * ( 1 / r1**(2 * s) + 1 / r2**(2 * s)) * fl_scaling(k=self.dim, s=s) return from meshmode.array_context import PyOpenCLArrayContext from meshmode.dof_array import thaw, flatten from meshmode.mesh.io import read_gmsh from meshmode.discretization import Discretization from meshmode.discretization.poly_element import \ PolynomialWarpAndBlendGroupFactory # {{{ gmsh processing import gmsh gmsh.initialize() gmsh.option.setNumber("General.Terminal", 1) # meshmode does not support other versions gmsh.option.setNumber("Mesh.MshFileVersion", 2) gmsh.option.setNumber("Mesh.CharacteristicLengthMax", mesh_size) gmsh.option.setNumber("Mesh.ElementOrder", mesh_order) if mesh_order > 1: gmsh.option.setNumber("Mesh.CharacteristicLengthFromCurvature", 1) # radius of source box hs = self.source_box_extent / 2 # radius of bouding sphere r = hs * np.sqrt(self.dim) logger.debug("r_inner = %f, r_outer = %f" % (hs, r)) if self.dim == 2: tag_box = gmsh.model.occ.addRectangle(x=0, y=0, z=0, dx=2 * hs, dy=2 * hs, tag=-1) elif self.dim == 3: tag_box = gmsh.model.occ.addBox(x=0, y=0, z=0, dx=2 * hs, dy=2 * hs, dz=2 * hs, tag=-1) else: raise NotImplementedError() if self.dim == 2: tag_ball = gmsh.model.occ.addDisk(xc=hs, yc=hs, zc=0, rx=r, ry=r, tag=-1) elif self.dim == 3: tag_sphere = gmsh.model.occ.addSphere(xc=hs, yc=hs, zc=hs, radius=r, tag=-1) tag_ball = gmsh.model.occ.addVolume([tag_sphere], tag=-1) else: raise NotImplementedError() dimtags_ints, dimtags_map_ints = gmsh.model.occ.cut( objectDimTags=[(self.dim, tag_ball)], toolDimTags=[(self.dim, tag_box)], tag=-1, removeObject=True, removeTool=True) gmsh.model.occ.synchronize() gmsh.model.mesh.generate(self.dim) from tempfile import mkdtemp from os.path import join temp_dir = mkdtemp(prefix="tmp_volumential_nft") msh_filename = join(temp_dir, 'chinese_lucky_coin.msh') gmsh.write(msh_filename) gmsh.finalize() mesh = read_gmsh(msh_filename) if remove_tmp_files: import shutil shutil.rmtree(temp_dir) # }}} End gmsh processing arr_ctx = PyOpenCLArrayContext(queue) discr = Discretization( arr_ctx, mesh, PolynomialWarpAndBlendGroupFactory(order=quad_order)) from pytential import bind, sym # {{{ optional checks if 1: if self.dim == 2: arerr = np.abs((np.pi * r**2 - (2 * hs)**2) - bind(discr, sym.integral(self.dim, self.dim, 1)) (queue)) / (np.pi * r**2 - (2 * hs)**2) if arerr > 1e-12: log_to = logger.warn else: log_to = logger.debug log_to("the numerical error when computing the measure of a " "unit ball is %e" % arerr) elif self.dim == 3: arerr = np.abs((4 / 3 * np.pi * r**3 - (2 * hs)**3) - bind(discr, sym.integral(self.dim, self.dim, 1)) (queue)) / (4 / 3 * np.pi * r**3 - (2 * hs)**3) if arerr > 1e-12: log_to = logger.warn else: log_to = logger.debug logger.warn( "The numerical error when computing the measure of a " "unit ball is %e" % arerr) # }}} End optional checks # {{{ kernel evaluation # TODO: take advantage of symmetry if this is too slow from volumential.droste import InverseDrosteReduced # only for getting kernel evaluation related stuff drf = InverseDrosteReduced(self.integral_knl, self.quad_order, self.interaction_case_vecs, n_brick_quad_points=0, knl_symmetry_tags=[], auto_windowing=False) # uses "dist[dim]", assigned to "knl_val" knl_insns = drf.get_sumpy_kernel_insns() eval_kernel_insns = [ insn.copy(within_inames=insn.within_inames | frozenset(["iqpt"])) for insn in knl_insns ] from sumpy.symbolic import SympyToPymbolicMapper sympy_conv = SympyToPymbolicMapper() scaling_assignment = lp.Assignment( id=None, assignee="knl_scaling", expression=sympy_conv( self.integral_knl.get_global_scaling_const()), temp_var_type=lp.Optional(), ) extra_kernel_kwarg_types = () if "extra_kernel_kwarg_types" in kwargs: extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"] lpknl = lp.make_kernel( # NOQA "{ [iqpt, iaxis]: 0<=iqpt<n_q_points and 0<=iaxis<dim }", [ """ for iqpt for iaxis <> dist[iaxis] = (quad_points[iaxis, iqpt] - target_point[iaxis]) end end """ ] + eval_kernel_insns + [scaling_assignment] + [ """ for iqpt result[iqpt] = knl_val * knl_scaling end """ ], [ lp.ValueArg("dim, n_q_points", np.int32), lp.GlobalArg("quad_points", np.float64, "dim, n_q_points"), lp.GlobalArg("target_point", np.float64, "dim") ] + list(extra_kernel_kwarg_types) + [ "...", ], name="eval_kernel_lucky_coin", lang_version=(2018, 2), ) lpknl = lp.fix_parameters(lpknl, dim=self.dim) lpknl = lp.set_options(lpknl, write_cl=False) lpknl = lp.set_options(lpknl, return_dict=True) # }}} End kernel evaluation node_coords = flatten(thaw(arr_ctx, discr.nodes())) nodes = cl.array.to_device( queue, np.vstack([crd.get() for crd in node_coords])) int_vals = [] for target in self.q_points: evt, res = lpknl(queue, quad_points=nodes, target_point=target) knl_vals = res['result'] integ = bind( discr, sym.integral(self.dim, self.dim, sym.var("integrand")))(queue, integrand=knl_vals) queue.finish() int_vals.append(integ) int_vals_coins = np.array(int_vals) int_vals_inf = np.zeros(self.n_q_points) # {{{ integrate over the exterior of the ball if self.dim == 2: def rho_0(theta, target, radius): rho_x = np.linalg.norm(target, ord=2) return (-1 * rho_x * np.cos(theta) + np.sqrt(radius**2 - rho_x**2 * (np.sin(theta)**2))) def ext_inf_integrand(theta, s, target, radius): _rho_0 = rho_0(theta, target=target, radius=radius) return _rho_0**(-2 * s) def compute_ext_inf_integral(target, s, radius): # target: target point # s: fractional order # radius: radius of the circle import scipy.integrate as sint val, _ = sint.quadrature(partial(ext_inf_integrand, s=s, target=target, radius=radius), a=0, b=2 * np.pi) return val * (1 / (2 * s)) * fl_scaling(k=self.dim, s=s) if 1: # optional test target = [0, 0] s = 0.5 radius = 1 scaling = fl_scaling(k=self.dim, s=s) val = compute_ext_inf_integral(target, s, radius) test_err = np.abs(val - radius**(-2 * s) * 2 * np.pi * (1 / (2 * s)) * scaling) / (radius**(-2 * s) * 2 * np.pi * (1 / (2 * s)) * scaling) if test_err > 1e-12: logger.warn("Error evaluating at origin = %f" % test_err) for tid, target in enumerate(self.q_points): # The formula assumes that the source box is centered at origin int_vals_inf[tid] = compute_ext_inf_integral( target=target - hs, s=self.integral_knl.s, radius=r) elif self.dim == 3: # FIXME raise NotImplementedError("3D not yet implemented.") else: raise NotImplementedError("Unsupported dimension") # }}} End integrate over the exterior of the ball self.kernel_exterior_normalizers = int_vals_coins + int_vals_inf return
def map_insn_assign(self, insn): from grudge.symbolic.primitives import OperatorBinding if (len(insn.exprs) == 1 and (isinstance(insn.exprs[0], OperatorBinding) or is_external_call(insn.exprs[0], self.function_registry))): return insn iname = "grdg_i" size_name = "grdg_n" temp_names = [ name for name, dnr in zip(insn.names, insn.do_not_return) if dnr ] expr_mapper = ToLoopyExpressionMapper(self.dd_inference_mapper, temp_names, iname) insns = [] import loopy as lp from pymbolic import var for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return): insns.append( lp.Assignment( expr_mapper(var(name)), expr_mapper(expr), temp_var_type=lp.Optional(None) if dnr else lp.Optional(), no_sync_with=frozenset([ ("*", "any"), ]), )) if not expr_mapper.non_scalar_vars: return insn knl = lp.make_kernel( "{[%s]: 0 <= %s < %s}" % (iname, iname, size_name), insns, default_offset=lp.auto, name="grudge_assign_%d" % self.insn_count, # Single-insn kernels may have their no_sync_with resolve to an # empty set, that's OK. options=lp.Options(check_dep_resolution=False)) knl = lp.set_options(knl, return_dict=True) knl = lp.split_iname(knl, iname, 128, outer_tag="g.0", inner_tag="l.0") self.insn_count += 1 from pytools import single_valued governing_dd = single_valued( self.dd_inference_mapper(expr) for expr in insn.exprs) knl = lp.register_preamble_generators(knl, [bessel_preamble_generator]) knl = lp.register_function_manglers(knl, [bessel_function_mangler]) input_mappings = {} output_mappings = {} from grudge.symbolic.mappers import DependencyMapper dep_mapper = DependencyMapper(composite_leaves=False) for expr, name in six.iteritems(expr_mapper.expr_to_name): deps = dep_mapper(expr) assert len(deps) <= 1 if not deps: is_output = False else: dep, = deps is_output = dep.name in insn.names if is_output: tgt_dict = output_mappings else: tgt_dict = input_mappings tgt_dict[name] = expr return LoopyKernelInstruction( LoopyKernelDescriptor(loopy_kernel=knl, input_mappings=input_mappings, output_mappings=output_mappings, fixed_arguments={}, governing_dd=governing_dd))