def make_common_subexpression(field, prefix=None): from pytools.obj_array import log_shape from hedge.tools import is_zero from pymbolic.primitives import CommonSubexpression ls = log_shape(field) if ls != (): from pytools import indices_in_shape result = numpy.zeros(ls, dtype=object) for i in indices_in_shape(ls): if prefix is not None: component_prefix = prefix+"_".join(str(i_i) for i_i in i) else: component_prefix = None if is_zero(field[i]): result[i] = 0 else: result[i] = CommonSubexpression(field[i], component_prefix) return result else: if is_zero(field): return 0 else: return CommonSubexpression(field, prefix)
def map_quad_int_faces_grid_upsampler(self, op, expr): field = self.rec(expr) discr = self.executor.discr from hedge.tools import is_zero if is_zero(field): return 0 quad_info = discr.get_cuda_quadrature_info(op.quadrature_tag) result = self.discr._empty_gpuarray(quad_info.int_face_vector_size, dtype=field.dtype) for eg in self.discr.element_groups: eg_quad_info = discr.get_cuda_elgroup_quadrature_info( eg, op.quadrature_tag) kernel = discr.element_local_kernel( image_dofs_per_el=eg_quad_info.ldis_quad_info.face_node_count( ) * eg.local_discretization.face_count(), aligned_image_dofs_per_microblock=eg_quad_info. aligned_int_face_dofs_per_microblock) try: prepared_matrix = \ self.executor.elwise_linear_cache[eg, op, field.dtype] except KeyError: prepared_matrix = kernel.prepare_matrix( eg_quad_info.ldis_quad_info. volume_to_face_up_interpolation_matrix()) self.executor.elwise_linear_cache[eg, op, field.dtype] = \ prepared_matrix kernel(field, prepared_matrix, out_vector=result) return result
def map_quad_grid_upsampler(self, op, expr): field = self.rec(expr) discr = self.executor.discr from hedge.tools import is_zero if is_zero(field): return 0 quad_info = discr.get_cuda_quadrature_info( op.quadrature_tag) result = self.discr._empty_gpuarray( quad_info.volume_vector_size, dtype=field.dtype) for eg in self.discr.element_groups: eg_quad_info = discr.get_cuda_elgroup_quadrature_info( eg, op.quadrature_tag) kernel = discr.element_local_kernel( image_dofs_per_el=eg_quad_info.ldis_quad_info.node_count(), aligned_image_dofs_per_microblock =eg_quad_info.aligned_dofs_per_microblock) try: prepared_matrix = \ self.executor.elwise_linear_cache[eg, op, field.dtype] except KeyError: prepared_matrix = kernel.prepare_matrix( eg_quad_info.ldis_quad_info.volume_up_interpolation_matrix()) self.executor.elwise_linear_cache[eg, op, field.dtype] = \ prepared_matrix kernel(field, prepared_matrix, out_vector=result) return result
def map_elementwise_linear(self, op, expr): field = self.rec(expr) from hedge.tools import is_zero if is_zero(field): return 0 kernel = self.executor.discr.element_local_kernel() # FIXME: wouldn't volume_empty suffice? result = self.discr.volume_zeros(dtype=field.dtype) for eg in self.discr.element_groups: try: prepared_matrix = \ self.executor.elwise_linear_cache[eg, op, field.dtype] except KeyError: prepared_matrix = kernel.prepare_matrix(op.matrix(eg)) assert op.coefficients(eg) is None, \ "per-element scaling of elementwise linear ops is no " \ "longer supported" self.executor.elwise_linear_cache[eg, op, field.dtype] = \ prepared_matrix kernel(field, prepared_matrix, out_vector=result) return result
def bind_one(subexpr): if is_zero(subexpr): return subexpr else: from hedge.optemplate.primitives import OperatorBinding return OperatorBinding(self, subexpr)
def map_field_component(self, expr): if expr.is_interior: prefix = "a" f_expr = self.int_field_expr else: prefix = "b" f_expr = self.ext_field_expr from hedge.tools import is_obj_array, is_zero from pymbolic import var if is_obj_array(f_expr): f_expr = f_expr[expr.index] if is_zero(f_expr): return 0 return var("val_%s_field%d" % (prefix, self.dep_to_index[f_expr])) else: assert expr.index == 0, repr(f_expr) if is_zero(f_expr): return 0 return var("val_%s_field%d" % (prefix, self.dep_to_index[f_expr]))
def get_flux_dependencies(flux, field, bdry="all"): from hedge.flux import FluxDependencyMapper, FieldComponent in_fields = list(FluxDependencyMapper( include_calls=False)(flux)) # check that all in_fields are FieldComponent instances assert not [in_field for in_field in in_fields if not isinstance(in_field, FieldComponent)] def maybe_index(fld, index): from hedge.tools import is_obj_array if is_obj_array(fld): return fld[inf.index] else: return fld from hedge.tools import is_zero from hedge.optemplate import BoundaryPair if isinstance(field, BoundaryPair): for inf in in_fields: if inf.is_interior: if bdry in ["all", "int"]: value = maybe_index(field.field, inf.index) if not is_zero(value): yield value else: if bdry in ["all", "ext"]: value = maybe_index(field.bfield, inf.index) if not is_zero(value): yield value else: for inf in in_fields: value = maybe_index(field, inf.index) if not is_zero(value): yield value
def get_flux_dependencies(flux, field, bdry="all"): from hedge.flux import FluxDependencyMapper, FieldComponent in_fields = list(FluxDependencyMapper(include_calls=False)(flux)) # check that all in_fields are FieldComponent instances assert not [ in_field for in_field in in_fields if not isinstance(in_field, FieldComponent) ] def maybe_index(fld, index): from hedge.tools import is_obj_array if is_obj_array(fld): return fld[inf.index] else: return fld from hedge.tools import is_zero from hedge.optemplate import BoundaryPair if isinstance(field, BoundaryPair): for inf in in_fields: if inf.is_interior: if bdry in ["all", "int"]: value = maybe_index(field.field, inf.index) if not is_zero(value): yield value else: if bdry in ["all", "ext"]: value = maybe_index(field.bfield, inf.index) if not is_zero(value): yield value else: for inf in in_fields: value = maybe_index(field, inf.index) if not is_zero(value): yield value
def map_quad_bdry_grid_upsampler(self, op, expr): field = self.rec(expr) discr = self.executor.discr from hedge.tools import is_zero if is_zero(field): return 0 quad_info = discr.get_cuda_quadrature_info( op.quadrature_tag) result = self.discr._empty_gpuarray( quad_info.face_storage_info.aligned_boundary_dof_count, dtype=field.dtype) for eg in self.discr.element_groups: eqi = discr.get_cuda_elgroup_quadrature_info( eg, op.quadrature_tag) kernel = discr.element_local_kernel( aligned_preimage_dofs_per_microblock =discr.face_storage_info.aligned_boundary_dofs_per_face, preimage_dofs_per_el =eg.local_discretization.face_node_count(), aligned_image_dofs_per_microblock =quad_info.face_storage_info.aligned_boundary_dofs_per_face, image_dofs_per_el =eqi.ldis_quad_info.face_node_count(), elements_per_microblock=1, microblock_count =quad_info.face_storage_info.aligned_boundary_dof_count// quad_info.face_storage_info.aligned_boundary_dofs_per_face) try: prepared_matrix = \ self.executor.elwise_linear_cache[eg, op, field.dtype] except KeyError: prepared_matrix = kernel.prepare_matrix( eqi.ldis_quad_info .face_up_interpolation_matrix()) self.executor.elwise_linear_cache[eg, op, field.dtype] = \ prepared_matrix kernel(field, prepared_matrix, out_vector=result) return result
def __add__(self, update): from hedge.tools import is_zero if is_zero(update): return self from pyrticle.tools import NumberShiftableVector #from pytools import typedump dx, dp, ddep = update dx = NumberShiftableVector.unwrap(dx) dp = NumberShiftableVector.unwrap(dp) new_state = self.method.advance_state(self.state, dx, dp, ddep) return TimesteppablePicState(self.method, new_state)
def incident_bc(self, w=None): "Flux terms for incident boundary conditions" # NOTE: Untested for inhomogeneous materials, but would usually be # physically meaningless anyway (are there exceptions to this?) e, h = self.split_eh(self.field_placeholder(w)) if not self.fixed_material: from warnings import warn if self.incident_tag != hedge.mesh.TAG_NONE: warn("Incident boundary conditions assume homogeneous" " background material, results may be unphysical") from hedge.tools import count_subset fld_cnt = count_subset(self.get_eh_subset()) from hedge.tools import is_zero incident_bc_data = self.incident_bc_data(self, e, h) if is_zero(incident_bc_data): return make_obj_array([0]*fld_cnt) else: return cse(-incident_bc_data)
def incident_bc(self, w=None): "Flux terms for incident boundary conditions" # NOTE: Untested for inhomogeneous materials, but would usually be # physically meaningless anyway (are there exceptions to this?) e, h = self.split_eh(self.field_placeholder(w)) if not self.fixed_material: from warnings import warn if self.incident_tag != hedge.mesh.TAG_NONE: warn("Incident boundary conditions assume homogeneous" " background material, results may be unphysical") from hedge.tools import count_subset fld_cnt = count_subset(self.get_eh_subset()) from hedge.tools import is_zero incident_bc_data = self.incident_bc_data(self, e, h) if is_zero(incident_bc_data): return make_obj_array([0] * fld_cnt) else: return cse(-incident_bc_data)
def __call__(self, operators, field): # pick a "representative operator" rep_op = operators[0] result = [self.discr.volume_zeros(dtype=field.dtype) for i in range(self.discr.dimensions)] from hedge.tools import is_zero if not is_zero(field): for eg in self.discr.element_groups: from pytools import to_uncomplex_dtype uncomplex_dtype = to_uncomplex_dtype(field.dtype) matrices = rep_op.matrices(eg) args = ([rep_op.preimage_ranges(eg), eg.ranges, field] + [m.astype(uncomplex_dtype) for m in matrices] + result) diff_routine = self.make_diff(eg, field.dtype, matrices[0].shape) diff_routine(*args) return [result[op.rst_axis] for op in operators]
def finalize_multi_assign(self, names, exprs, do_not_return, priority): from pytools import any from hedge.tools import is_zero has_zero_assignees = any(is_zero(expr) for expr in exprs) if has_zero_assignees: if len(exprs) > 1: raise RuntimeError("found aggregated zero constant assignment") from hedge.optemplate import FlopCounter flop_count = sum(FlopCounter()(expr) for expr in exprs) if has_zero_assignees or flop_count == 0: return Assign(names, exprs, priority=priority, dep_mapper_factory=self.dep_mapper_factory) else: return VectorExprAssign(names=names, exprs=exprs, do_not_return=do_not_return, dep_mapper_factory=self.dep_mapper_factory, priority=priority)
def __call__(self, operators, field): # pick a "representative operator" rep_op = operators[0] result = [self.discr.volume_zeros(dtype=field.dtype) for i in range(self.discr.dimensions)] from hedge.tools import is_zero if not is_zero(field): for eg in self.discr.element_groups: from pytools import to_uncomplex_dtype uncomplex_dtype = to_uncomplex_dtype(field.dtype) matrices = rep_op.matrices(eg) args = ( [rep_op.preimage_ranges(eg), eg.ranges, field] + [m.astype(uncomplex_dtype) for m in matrices] + result ) diff_routine = self.make_diff(eg, field.dtype, matrices[0].shape) diff_routine(*args) return [result[op.rst_axis] for op in operators]
def finalize_multi_assign(self, names, exprs, do_not_return, priority): from pytools import any from hedge.tools import is_zero has_zero_assignees = any(is_zero(expr) for expr in exprs) if has_zero_assignees: if len(exprs) > 1: raise RuntimeError("found aggregated zero constant assignment") from hedge.optemplate import FlopCounter flop_count = sum(FlopCounter()(expr) for expr in exprs) if has_zero_assignees or flop_count == 0: return Assign(names, exprs, priority=priority, dep_mapper_factory=self.dep_mapper_factory) else: return VectorExprAssign( names=names, exprs=exprs, do_not_return=do_not_return, dep_mapper_factory=self.dep_mapper_factory, priority=priority, )
def map_operator_binding(self, expr): from hedge.optemplate.operators import FluxOperatorBase from hedge.optemplate.primitives import BoundaryPair from hedge.flux import FluxSubstitutionMapper, FieldComponent if not (isinstance(expr.op, FluxOperatorBase) and isinstance(expr.field, BoundaryPair)): return IdentityMapper.map_operator_binding(self, expr) bpair = expr.field vol_field = bpair.field bdry_field = bpair.bfield flux = expr.op.flux bdry_dependencies = DependencyMapper( include_calls="descend_args", include_operator_bindings=True)(bdry_field) vol_dependencies = DependencyMapper( include_operator_bindings=True)(vol_field) vol_bdry_intersection = bdry_dependencies & vol_dependencies if vol_bdry_intersection: raise RuntimeError( "Variables are being used as both " "boundary and volume quantities: %s" % ", ".join(str(v) for v in vol_bdry_intersection)) # Step 1: Find maximal flux-evaluable subexpression of boundary field # in given BoundaryPair. class MaxBoundaryFluxEvaluableExpressionFinder(IdentityMapper, OperatorReducerMixin): def __init__(self, vol_expr_list, expensive_bdry_op_detector): self.vol_expr_list = vol_expr_list self.vol_expr_to_idx = dict( (vol_expr, idx) for idx, vol_expr in enumerate(vol_expr_list)) self.bdry_expr_list = [] self.bdry_expr_to_idx = {} self.expensive_bdry_op_detector = expensive_bdry_op_detector # {{{ expression registration def register_boundary_expr(self, expr): try: return self.bdry_expr_to_idx[expr] except KeyError: idx = len(self.bdry_expr_to_idx) self.bdry_expr_to_idx[expr] = idx self.bdry_expr_list.append(expr) return idx def register_volume_expr(self, expr): try: return self.vol_expr_to_idx[expr] except KeyError: idx = len(self.vol_expr_to_idx) self.vol_expr_to_idx[expr] = idx self.vol_expr_list.append(expr) return idx # }}} # {{{ map_xxx routines @memoize_method def map_common_subexpression(self, expr): # Here we need to decide whether this CSE should be turned into # a flux CSE or not. This is a good idea if the transformed # expression only contains "bare" volume or boundary # expressions. However, as soon as an operator is applied # somewhere in the subexpression, the CSE should not be touched # in order to avoid redundant evaluation of that operator. # # Observe that at the time of this writing (Feb 2010), the only # operators that may occur in boundary expressions are # quadrature-related. has_expensive_operators = \ self.expensive_bdry_op_detector(expr.child) if has_expensive_operators: return FieldComponent(self.register_boundary_expr(expr), is_interior=False) else: return IdentityMapper.map_common_subexpression(self, expr) def map_normal(self, expr): raise RuntimeError( "Your operator template contains a flux normal. " "You may find this confusing, but you can't do that. " "It turns out that you need to use " "hedge.optemplate.make_normal() for normals in boundary " "terms of operator templates.") def map_normal_component(self, expr): if expr.boundary_tag != bpair.tag: raise RuntimeError( "BoundaryNormalComponent and BoundaryPair " "do not agree about boundary tag: %s vs %s" % (expr.boundary_tag, bpair.tag)) from hedge.flux import Normal return Normal(expr.axis) def map_variable(self, expr): return FieldComponent(self.register_boundary_expr(expr), is_interior=False) map_subscript = map_variable def map_operator_binding(self, expr): from hedge.optemplate import (BoundarizeOperator, FluxExchangeOperator, QuadratureGridUpsampler, QuadratureBoundaryGridUpsampler) if isinstance(expr.op, BoundarizeOperator): if expr.op.tag != bpair.tag: raise RuntimeError( "BoundarizeOperator and BoundaryPair " "do not agree about boundary tag: %s vs %s" % (expr.op.tag, bpair.tag)) return FieldComponent(self.register_volume_expr( expr.field), is_interior=True) elif isinstance(expr.op, FluxExchangeOperator): from hedge.mesh import TAG_RANK_BOUNDARY op_tag = TAG_RANK_BOUNDARY(expr.op.rank) if bpair.tag != op_tag: raise RuntimeError( "BoundarizeOperator and " "FluxExchangeOperator do not agree about " "boundary tag: %s vs %s" % (op_tag, bpair.tag)) return FieldComponent(self.register_boundary_expr(expr), is_interior=False) elif isinstance(expr.op, QuadratureBoundaryGridUpsampler): if bpair.tag != expr.op.boundary_tag: raise RuntimeError( "BoundarizeOperator " "and QuadratureBoundaryGridUpsampler " "do not agree about boundary tag: %s vs %s" % (expr.op.boundary_tag, bpair.tag)) return FieldComponent(self.register_boundary_expr(expr), is_interior=False) elif isinstance(expr.op, QuadratureGridUpsampler): # We're invoked before operator specialization, so we may # see these instead of QuadratureBoundaryGridUpsampler. return FieldComponent(self.register_boundary_expr(expr), is_interior=False) else: raise RuntimeError( "Found '%s' in a boundary term. " "To the best of my knowledge, no hedge operator applies " "directly to boundary data, so this is likely in error." % expr.op) def map_flux_exchange(self, expr): return FieldComponent(self.register_boundary_expr(expr), is_interior=False) # }}} from hedge.tools import is_obj_array if not is_obj_array(vol_field): vol_field = [vol_field] mbfeef = MaxBoundaryFluxEvaluableExpressionFinder( list(vol_field), self.expensive_bdry_op_detector) #from hedge.optemplate.tools import pretty #print pretty(bdry_field) #raw_input("YO") new_bdry_field = mbfeef(bdry_field) # Step II: Substitute the new_bdry_field into the flux. def sub_bdry_into_flux(expr): if isinstance(expr, FieldComponent) and not expr.is_interior: if expr.index == 0 and not is_obj_array(bdry_field): return new_bdry_field else: return new_bdry_field[expr.index] else: return None new_flux = FluxSubstitutionMapper(sub_bdry_into_flux)(flux) from hedge.tools import is_zero, make_obj_array if is_zero(new_flux): return 0 else: return type(expr.op)(new_flux, *expr.op.__getinitargs__()[1:])( BoundaryPair( make_obj_array([self.rec(e) for e in mbfeef.vol_expr_list]), make_obj_array( [self.rec(e) for e in mbfeef.bdry_expr_list]), bpair.tag))
def aggregate_assignments(self, instructions, result): from pymbolic.primitives import Variable # aggregation helpers ------------------------------------------------- def get_complete_origins_set(insn, skip_levels=0): if skip_levels < 0: skip_levels = 0 result = set() for dep in insn.get_dependencies(): if isinstance(dep, Variable): dep_origin = origins_map.get(dep.name, None) if dep_origin is not None: if skip_levels <= 0: result.add(dep_origin) result |= get_complete_origins_set( dep_origin, skip_levels-1) return result var_assignees_cache = {} def get_var_assignees(insn): try: return var_assignees_cache[insn] except KeyError: result = set(Variable(assignee) for assignee in insn.get_assignees()) var_assignees_cache[insn] = result return result def aggregate_two_assignments(ass_1, ass_2): names = ass_1.names + ass_2.names from pymbolic.primitives import Variable deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \ - set(Variable(name) for name in names) return Assign( names=names, exprs=ass_1.exprs + ass_2.exprs, _dependencies=deps, dep_mapper_factory=self.dep_mapper_factory, priority=max(ass_1.priority, ass_2.priority)) # main aggregation pass ----------------------------------------------- origins_map = dict( (assignee, insn) for insn in instructions for assignee in insn.get_assignees()) from pytools import partition unprocessed_assigns, other_insns = partition( lambda insn: isinstance(insn, Assign), instructions) # filter out zero-flop-count assigns--no need to bother with those processed_assigns, unprocessed_assigns = partition( lambda ass: ass.flop_count() == 0, unprocessed_assigns) # filter out zero assignments from pytools import any from hedge.tools import is_zero i = 0 while i < len(unprocessed_assigns): my_assign = unprocessed_assigns[i] if any(is_zero(expr) for expr in my_assign.exprs): processed_assigns.append(unprocessed_assigns.pop()) else: i += 1 # greedy aggregation while unprocessed_assigns: my_assign = unprocessed_assigns.pop() my_deps = my_assign.get_dependencies() my_assignees = get_var_assignees(my_assign) agg_candidates = [] for i, other_assign in enumerate(unprocessed_assigns): other_deps = other_assign.get_dependencies() other_assignees = get_var_assignees(other_assign) if ((my_deps & other_deps or my_deps & other_assignees or other_deps & my_assignees) and my_assign.priority == other_assign.priority): agg_candidates.append((i, other_assign)) did_work = False if agg_candidates: my_indirect_origins = get_complete_origins_set( my_assign, skip_levels=1) for other_assign_index, other_assign in agg_candidates: if self.max_vectors_in_batch_expr is not None: new_assignee_count = len( set(my_assign.get_assignees()) | set(other_assign.get_assignees())) new_dep_count = len( my_assign.get_dependencies( each_vector=True) | other_assign.get_dependencies( each_vector=True)) if (new_assignee_count + new_dep_count \ > self.max_vectors_in_batch_expr): continue other_indirect_origins = get_complete_origins_set( other_assign, skip_levels=1) if (my_assign not in other_indirect_origins and other_assign not in my_indirect_origins): did_work = True # aggregate the two assignments new_assignment = aggregate_two_assignments( my_assign, other_assign) del unprocessed_assigns[other_assign_index] unprocessed_assigns.append(new_assignment) for assignee in new_assignment.get_assignees(): origins_map[assignee] = new_assignment break if not did_work: processed_assigns.append(my_assign) externally_used_names = set( expr for insn in processed_assigns + other_insns for expr in insn.get_dependencies()) from hedge.tools import is_obj_array if is_obj_array(result): externally_used_names |= set(expr for expr in result) else: externally_used_names |= set([result]) def schedule_and_finalize_assignment(ass): dep_mapper = self.dep_mapper_factory() names_exprs = zip(ass.names, ass.exprs) my_assignees = set(name for name, expr in names_exprs) names_exprs_deps = [ (name, expr, set(dep.name for dep in dep_mapper(expr) if isinstance(dep, Variable)) & my_assignees) for name, expr in names_exprs] ordered_names_exprs = [] available_names = set() while names_exprs_deps: schedulable = [] i = 0 while i < len(names_exprs_deps): name, expr, deps = names_exprs_deps[i] unsatisfied_deps = deps - available_names if not unsatisfied_deps: schedulable.append((str(expr), name, expr)) del names_exprs_deps[i] else: i += 1 # make sure these come out in a constant order schedulable.sort() if schedulable: for key, name, expr in schedulable: ordered_names_exprs.append((name, expr)) available_names.add(name) else: raise RuntimeError("aggregation resulted in an " "impossible assignment") return self.finalize_multi_assign( names=[name for name, expr in ordered_names_exprs], exprs=[expr for name, expr in ordered_names_exprs], do_not_return=[Variable(name) not in externally_used_names for name, expr in ordered_names_exprs], priority=ass.priority) return [schedule_and_finalize_assignment(ass) for ass in processed_assigns] + other_insns
def nb_bdry_permute(fld): if is_zero(fld): return 0 else: return fld[from_nb_indices]
def __call__(self, eval_dependency, lift_plan): discr = self.discr fplan = self.plan given = fplan.given elgroup, = discr.element_groups all_fluxes_on_faces = [ gpuarray.empty(given.matmul_preimage_shape(lift_plan), dtype=given.float_type, allocator=discr.pool.allocate) for i in range(len(self.fluxes)) ] fdata = self.flux_face_data_block(elgroup) ilist_data = self.index_list_data() block, gather, texref_map = self.get_kernel(fdata, ilist_data, for_benchmark=False) for dep_expr in self.all_deps: dep_field = eval_dependency(dep_expr) from hedge.tools import is_zero if is_zero(dep_field): if dep_expr in self.dep_to_tag: dep_field = discr.boundary_zeros(self.dep_to_tag[dep_expr]) else: dep_field = discr.volume_zeros() assert dep_field.dtype == given.float_type, "Wrong types: %s: %s, %s: %s" % ( dep_expr, dep_field.dtype, given, given.float_type) dep_field.bind_to_texref_ext(texref_map[dep_expr], allow_double_hack=True) if set(["cuda_flux", "cuda_debugbuf"]) <= discr.debug: debugbuf = gpuarray.zeros((10000, ), dtype=given.float_type) else: from hedge.backends.cuda.tools import FakeGPUArray debugbuf = FakeGPUArray() if discr.instrumented: discr.flux_gather_timer.add_timer_callable( gather.prepared_timed_call( (len(discr.blocks), 1), block, debugbuf.gpudata, fdata.device_memory, *tuple(fof.gpudata for fof in all_fluxes_on_faces))) discr.gmem_bytes_gather.add( len(discr.blocks) * fdata.block_bytes + given.float_size() * ( # fetch len(self.fluxes) * 2 * fdata.fp_count * fplan.dofs_per_face # store + len(discr.blocks) * len(self.fluxes) * fplan.microblocks_per_block() * fplan.aligned_face_dofs_per_microblock())) else: gather.prepared_call( (len(discr.blocks), 1), block, debugbuf.gpudata, fdata.device_memory, *tuple(fof.gpudata for fof in all_fluxes_on_faces)) if set(["cuda_flux", "cuda_debugbuf"]) <= discr.debug: from hedge.tools import get_rank, wait_for_keypress if get_rank(discr) == 0: copied_debugbuf = debugbuf.get() print "DEBUG", len(discr.blocks) numpy.set_printoptions(linewidth=130) #print numpy.reshape(copied_debugbuf, (32, 16)) print copied_debugbuf[:50] #for i in range(len(discr.blocks)*6): #print i, copied_debugbuf[i*16:(i+1)*16] #print i, [x-10000 for x in sorted(copied_debugbuf[i*16:(i+1)*16]) if x != 0] wait_for_keypress(discr) if "cuda_flux" in discr.debug: from hedge.tools import get_rank, wait_for_keypress if get_rank(discr) == 0: numpy.set_printoptions(linewidth=130, precision=2, threshold=10**6) if True: cols = [] for k in range(len(all_fluxes_on_faces)): my_fof = all_fluxes_on_faces[k].get() def sstruc(a): result = "" for i in a: if i == 0: result += "0" elif abs(i) < 1e-10: result += "-" elif numpy.isnan(i): result += "N" elif i == 17: result += "*" else: result += "#" return result useful_sz = given.block_count \ * given.microblocks_per_block \ * lift_plan.aligned_preimage_dofs_per_microblock my_col = [] i = 0 while i < useful_sz: my_col.append(sstruc(my_fof[i:i + 16])) i += 16 cols.append(my_col) from pytools import Table tbl = Table() tbl.add_row(["num"] + range(len(cols))) i = 0 for row in zip(*cols): tbl.add_row((i, ) + row) i += 1 print tbl else: for i in range(len(all_fluxes_on_faces)): print i print all_fluxes_on_faces[i].get() wait_for_keypress(discr) #print "B", [la.norm(fof.get()) for fof in all_fluxes_on_faces] return all_fluxes_on_faces
def op_template(self, sensor_scaling=None, viscosity_only=False): u = self.cse_u rho = self.cse_rho rho_u = self.rho_u p = self.p e = self.e # {{{ artificial diffusion def make_artificial_diffusion(): if self.artificial_viscosity_mode not in ["diffusion"]: return 0 dq = self.grad_of_state() return make_obj_array([ self.div( to_vol_quad(self.sensor())*to_vol_quad(dq[i]), to_int_face_quad(self.sensor())*to_int_face_quad(dq[i])) for i in range(dq.shape[0])]) # }}} # {{{ state setup volq_flux = self.flux(self.volq_state()) faceq_flux = self.flux(self.faceq_state()) from hedge.optemplate.primitives import CFunction sqrt = CFunction("sqrt") speed = self.characteristic_velocity_optemplate(self.state()) has_viscosity = not is_zero(self.get_mu(self.state(), to_quad_op=None)) # }}} # {{{ operator assembly ----------------------------------------------- from hedge.flux.tools import make_lax_friedrichs_flux from hedge.optemplate.operators import InverseMassOperator from hedge.optemplate.tools import make_stiffness_t primitive_bcs_as_quad_conservative = dict( (tag, self.primitive_to_conservative(to_bdry_quad(bc))) for tag, bc in self.get_primitive_boundary_conditions().iteritems()) def get_bc_tuple(tag): state = self.state() bc = make_obj_array([ self.get_boundary_condition_for(tag, s_i) for s_i in state]) return tag, bc, self.flux(bc) first_order_part = InverseMassOperator()( numpy.dot(make_stiffness_t(self.dimensions), volq_flux) - make_lax_friedrichs_flux( wave_speed=cse(to_int_face_quad(speed), "emax_c"), state=self.faceq_state(), fluxes=faceq_flux, bdry_tags_states_and_fluxes=[ get_bc_tuple(tag) for tag in self.get_boundary_tags()], strong=False)) if viscosity_only: first_order_part = 0*first_order_part result = join_fields( first_order_part + self.make_second_order_part() + make_artificial_diffusion() + self.make_extra_terms(), speed) if self.source is not None: result = result + join_fields( make_sym_vector("source_vect", len(self.state())), # extra field for speed 0) return result
def op_template(self, sensor_scaling=None, viscosity_only=False): u = self.cse_u rho = self.cse_rho rho_u = self.rho_u p = self.p e = self.e # {{{ artificial diffusion def make_artificial_diffusion(): if self.artificial_viscosity_mode not in ["diffusion"]: return 0 dq = self.grad_of_state() return make_obj_array([ self.div( to_vol_quad(self.sensor()) * to_vol_quad(dq[i]), to_int_face_quad(self.sensor()) * to_int_face_quad(dq[i])) for i in range(dq.shape[0]) ]) # }}} # {{{ state setup volq_flux = self.flux(self.volq_state()) faceq_flux = self.flux(self.faceq_state()) from hedge.optemplate.primitives import CFunction sqrt = CFunction("sqrt") speed = self.characteristic_velocity_optemplate(self.state()) has_viscosity = not is_zero(self.get_mu(self.state(), to_quad_op=None)) # }}} # {{{ operator assembly ----------------------------------------------- from hedge.flux.tools import make_lax_friedrichs_flux from hedge.optemplate.operators import InverseMassOperator from hedge.optemplate.tools import make_stiffness_t primitive_bcs_as_quad_conservative = dict( (tag, self.primitive_to_conservative(to_bdry_quad(bc))) for tag, bc in self.get_primitive_boundary_conditions().iteritems()) def get_bc_tuple(tag): state = self.state() bc = make_obj_array( [self.get_boundary_condition_for(tag, s_i) for s_i in state]) return tag, bc, self.flux(bc) first_order_part = InverseMassOperator()( numpy.dot(make_stiffness_t(self.dimensions), volq_flux) - make_lax_friedrichs_flux( wave_speed=cse(to_int_face_quad(speed), "emax_c"), state=self.faceq_state(), fluxes=faceq_flux, bdry_tags_states_and_fluxes=[ get_bc_tuple(tag) for tag in self.get_boundary_tags() ], strong=False)) if viscosity_only: first_order_part = 0 * first_order_part result = join_fields( first_order_part + self.make_second_order_part() + make_artificial_diffusion() + self.make_extra_terms(), speed) if self.source is not None: result = result + join_fields( make_sym_vector("source_vect", len(self.state())), # extra field for speed 0) return result
def __call__(self, eval_dependency, lift_plan): discr = self.discr fplan = self.plan given = fplan.given elgroup, = discr.element_groups all_fluxes_on_faces = [gpuarray.empty( given.matmul_preimage_shape(lift_plan), dtype=given.float_type, allocator=discr.pool.allocate) for i in range(len(self.fluxes))] fdata = self.flux_face_data_block(elgroup) ilist_data = self.index_list_data() block, gather, texref_map = self.get_kernel(fdata, ilist_data, for_benchmark=False) for dep_expr in self.all_deps: dep_field = eval_dependency(dep_expr) from hedge.tools import is_zero if is_zero(dep_field): if dep_expr in self.dep_to_tag: dep_field = discr.boundary_zeros(self.dep_to_tag[dep_expr]) else: dep_field = discr.volume_zeros() assert dep_field.dtype == given.float_type dep_field.bind_to_texref_ext(texref_map[dep_expr], allow_double_hack=True) if set(["cuda_flux", "cuda_debugbuf"]) <= discr.debug: debugbuf = gpuarray.zeros((10000,), dtype=given.float_type) else: from hedge.backends.cuda.tools import FakeGPUArray debugbuf = FakeGPUArray() if discr.instrumented: discr.flux_gather_timer.add_timer_callable(gather.prepared_timed_call( (len(discr.blocks), 1), block, debugbuf.gpudata, fdata.device_memory, *tuple(fof.gpudata for fof in all_fluxes_on_faces) )) discr.gmem_bytes_gather.add( len(discr.blocks) * fdata.block_bytes + given.float_size() * ( # fetch len(self.fluxes) * 2*fdata.fp_count * fplan.dofs_per_face # store + len(discr.blocks) * len(self.fluxes) * fplan.microblocks_per_block() * fplan.aligned_face_dofs_per_microblock() )) else: gather.prepared_call( (len(discr.blocks), 1), block, debugbuf.gpudata, fdata.device_memory, *tuple(fof.gpudata for fof in all_fluxes_on_faces) ) if set(["cuda_flux", "cuda_debugbuf"]) <= discr.debug: from hedge.tools import get_rank, wait_for_keypress if get_rank(discr) == 0: copied_debugbuf = debugbuf.get() print "DEBUG", len(discr.blocks) numpy.set_printoptions(linewidth=130) #print numpy.reshape(copied_debugbuf, (32, 16)) print copied_debugbuf[:50] #for i in range(len(discr.blocks)*6): #print i, copied_debugbuf[i*16:(i+1)*16] #print i, [x-10000 for x in sorted(copied_debugbuf[i*16:(i+1)*16]) if x != 0] wait_for_keypress(discr) if "cuda_flux" in discr.debug: from hedge.tools import get_rank, wait_for_keypress if get_rank(discr) == 0: numpy.set_printoptions(linewidth=130, precision=2, threshold=10**6) if True: cols = [] for k in range(len(all_fluxes_on_faces)): my_fof = all_fluxes_on_faces[k].get() def sstruc(a): result = "" for i in a: if i == 0: result += "0" elif abs(i) < 1e-10: result += "-" elif numpy.isnan(i): result += "N" elif i == 17: result += "*" else: result += "#" return result useful_sz = given.block_count \ * given.microblocks_per_block \ * lift_plan.aligned_preimage_dofs_per_microblock my_col = [] i = 0 while i < useful_sz: my_col.append(sstruc(my_fof[i:i+16])) i += 16 cols.append(my_col) from pytools import Table tbl = Table() tbl.add_row(["num"]+range(len(cols))) i = 0 for row in zip(*cols): tbl.add_row((i,)+row) i += 1 print tbl else: for i in range(len(all_fluxes_on_faces)): print i print all_fluxes_on_faces[i].get() wait_for_keypress(discr) #print "B", [la.norm(fof.get()) for fof in all_fluxes_on_faces] return all_fluxes_on_faces
def map_operator_binding(self, expr): from hedge.optemplate.operators import FluxOperatorBase from hedge.optemplate.primitives import BoundaryPair from hedge.flux import FluxSubstitutionMapper, FieldComponent if not (isinstance(expr.op, FluxOperatorBase) and isinstance(expr.field, BoundaryPair)): return IdentityMapper.map_operator_binding(self, expr) bpair = expr.field vol_field = bpair.field bdry_field = bpair.bfield flux = expr.op.flux bdry_dependencies = DependencyMapper( include_calls="descend_args", include_operator_bindings=True)(bdry_field) vol_dependencies = DependencyMapper( include_operator_bindings=True)(vol_field) vol_bdry_intersection = bdry_dependencies & vol_dependencies if vol_bdry_intersection: raise RuntimeError("Variables are being used as both " "boundary and volume quantities: %s" % ", ".join(str(v) for v in vol_bdry_intersection)) # Step 1: Find maximal flux-evaluable subexpression of boundary field # in given BoundaryPair. class MaxBoundaryFluxEvaluableExpressionFinder( IdentityMapper, OperatorReducerMixin): def __init__(self, vol_expr_list, expensive_bdry_op_detector): self.vol_expr_list = vol_expr_list self.vol_expr_to_idx = dict((vol_expr, idx) for idx, vol_expr in enumerate(vol_expr_list)) self.bdry_expr_list = [] self.bdry_expr_to_idx = {} self.expensive_bdry_op_detector = expensive_bdry_op_detector # {{{ expression registration def register_boundary_expr(self, expr): try: return self.bdry_expr_to_idx[expr] except KeyError: idx = len(self.bdry_expr_to_idx) self.bdry_expr_to_idx[expr] = idx self.bdry_expr_list.append(expr) return idx def register_volume_expr(self, expr): try: return self.vol_expr_to_idx[expr] except KeyError: idx = len(self.vol_expr_to_idx) self.vol_expr_to_idx[expr] = idx self.vol_expr_list.append(expr) return idx # }}} # {{{ map_xxx routines @memoize_method def map_common_subexpression(self, expr): # Here we need to decide whether this CSE should be turned into # a flux CSE or not. This is a good idea if the transformed # expression only contains "bare" volume or boundary # expressions. However, as soon as an operator is applied # somewhere in the subexpression, the CSE should not be touched # in order to avoid redundant evaluation of that operator. # # Observe that at the time of this writing (Feb 2010), the only # operators that may occur in boundary expressions are # quadrature-related. has_expensive_operators = \ self.expensive_bdry_op_detector(expr.child) if has_expensive_operators: return FieldComponent( self.register_boundary_expr(expr), is_interior=False) else: return IdentityMapper.map_common_subexpression(self, expr) def map_normal(self, expr): raise RuntimeError("Your operator template contains a flux normal. " "You may find this confusing, but you can't do that. " "It turns out that you need to use " "hedge.optemplate.make_normal() for normals in boundary " "terms of operator templates.") def map_normal_component(self, expr): if expr.boundary_tag != bpair.tag: raise RuntimeError("BoundaryNormalComponent and BoundaryPair " "do not agree about boundary tag: %s vs %s" % (expr.boundary_tag, bpair.tag)) from hedge.flux import Normal return Normal(expr.axis) def map_variable(self, expr): return FieldComponent( self.register_boundary_expr(expr), is_interior=False) map_subscript = map_variable def map_operator_binding(self, expr): from hedge.optemplate import (BoundarizeOperator, FluxExchangeOperator, QuadratureGridUpsampler, QuadratureBoundaryGridUpsampler) if isinstance(expr.op, BoundarizeOperator): if expr.op.tag != bpair.tag: raise RuntimeError("BoundarizeOperator and BoundaryPair " "do not agree about boundary tag: %s vs %s" % (expr.op.tag, bpair.tag)) return FieldComponent( self.register_volume_expr(expr.field), is_interior=True) elif isinstance(expr.op, FluxExchangeOperator): from hedge.mesh import TAG_RANK_BOUNDARY op_tag = TAG_RANK_BOUNDARY(expr.op.rank) if bpair.tag != op_tag: raise RuntimeError("BoundarizeOperator and FluxExchangeOperator " "do not agree about boundary tag: %s vs %s" % (op_tag, bpair.tag)) return FieldComponent( self.register_boundary_expr(expr), is_interior=False) elif isinstance(expr.op, QuadratureBoundaryGridUpsampler): if bpair.tag != expr.op.boundary_tag: raise RuntimeError("BoundarizeOperator " "and QuadratureBoundaryGridUpsampler " "do not agree about boundary tag: %s vs %s" % (expr.op.boundary_tag, bpair.tag)) return FieldComponent( self.register_boundary_expr(expr), is_interior=False) elif isinstance(expr.op, QuadratureGridUpsampler): # We're invoked before operator specialization, so we may # see these instead of QuadratureBoundaryGridUpsampler. return FieldComponent( self.register_boundary_expr(expr), is_interior=False) else: raise RuntimeError("Found '%s' in a boundary term. " "To the best of my knowledge, no hedge operator applies " "directly to boundary data, so this is likely in error." % expr.op) def map_flux_exchange(self, expr): return FieldComponent( self.register_boundary_expr(expr), is_interior=False) # }}} from hedge.tools import is_obj_array if not is_obj_array(vol_field): vol_field = [vol_field] mbfeef = MaxBoundaryFluxEvaluableExpressionFinder(list(vol_field), self.expensive_bdry_op_detector) #from hedge.optemplate.tools import pretty_print_optemplate #print pretty_print_optemplate(bdry_field) #raw_input("YO") new_bdry_field = mbfeef(bdry_field) # Step II: Substitute the new_bdry_field into the flux. def sub_bdry_into_flux(expr): if isinstance(expr, FieldComponent) and not expr.is_interior: if expr.index == 0 and not is_obj_array(bdry_field): return new_bdry_field else: return new_bdry_field[expr.index] else: return None new_flux = FluxSubstitutionMapper(sub_bdry_into_flux)(flux) from hedge.tools import is_zero, make_obj_array if is_zero(new_flux): return 0 else: return type(expr.op)(new_flux, *expr.op.__getinitargs__()[1:])( BoundaryPair( make_obj_array([self.rec(e) for e in mbfeef.vol_expr_list]), make_obj_array([self.rec(e) for e in mbfeef.bdry_expr_list]), bpair.tag))
def aggregate_assignments(self, instructions, result): from pymbolic.primitives import Variable # {{{ aggregation helpers def get_complete_origins_set(insn, skip_levels=0): if skip_levels < 0: skip_levels = 0 result = set() for dep in insn.get_dependencies(): if isinstance(dep, Variable): dep_origin = origins_map.get(dep.name, None) if dep_origin is not None: if skip_levels <= 0: result.add(dep_origin) result |= get_complete_origins_set( dep_origin, skip_levels - 1) return result var_assignees_cache = {} def get_var_assignees(insn): try: return var_assignees_cache[insn] except KeyError: result = set( Variable(assignee) for assignee in insn.get_assignees()) var_assignees_cache[insn] = result return result def aggregate_two_assignments(ass_1, ass_2): names = ass_1.names + ass_2.names from pymbolic.primitives import Variable deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \ - set(Variable(name) for name in names) return Assign(names=names, exprs=ass_1.exprs + ass_2.exprs, _dependencies=deps, dep_mapper_factory=self.dep_mapper_factory, priority=max(ass_1.priority, ass_2.priority)) # }}} # {{{ main aggregation pass origins_map = dict((assignee, insn) for insn in instructions for assignee in insn.get_assignees()) from pytools import partition unprocessed_assigns, other_insns = partition( lambda insn: isinstance(insn, Assign) and not insn. is_scalar_valued, instructions) # filter out zero-flop-count assigns--no need to bother with those processed_assigns, unprocessed_assigns = partition( lambda ass: ass.flop_count() == 0, unprocessed_assigns) # filter out zero assignments from pytools import any from hedge.tools import is_zero i = 0 while i < len(unprocessed_assigns): my_assign = unprocessed_assigns[i] if any(is_zero(expr) for expr in my_assign.exprs): processed_assigns.append(unprocessed_assigns.pop()) else: i += 1 # greedy aggregation while unprocessed_assigns: my_assign = unprocessed_assigns.pop() my_deps = my_assign.get_dependencies() my_assignees = get_var_assignees(my_assign) agg_candidates = [] for i, other_assign in enumerate(unprocessed_assigns): other_deps = other_assign.get_dependencies() other_assignees = get_var_assignees(other_assign) if ((my_deps & other_deps or my_deps & other_assignees or other_deps & my_assignees) and my_assign.priority == other_assign.priority): agg_candidates.append((i, other_assign)) did_work = False if agg_candidates: my_indirect_origins = get_complete_origins_set(my_assign, skip_levels=1) for other_assign_index, other_assign in agg_candidates: if self.max_vectors_in_batch_expr is not None: new_assignee_count = len( set(my_assign.get_assignees()) | set(other_assign.get_assignees())) new_dep_count = len( my_assign.get_dependencies(each_vector=True) | other_assign.get_dependencies(each_vector=True)) if (new_assignee_count + new_dep_count > self.max_vectors_in_batch_expr): continue other_indirect_origins = get_complete_origins_set( other_assign, skip_levels=1) if (my_assign not in other_indirect_origins and other_assign not in my_indirect_origins): did_work = True # aggregate the two assignments new_assignment = aggregate_two_assignments( my_assign, other_assign) del unprocessed_assigns[other_assign_index] unprocessed_assigns.append(new_assignment) for assignee in new_assignment.get_assignees(): origins_map[assignee] = new_assignment break if not did_work: processed_assigns.append(my_assign) externally_used_names = set(expr for insn in processed_assigns + other_insns for expr in insn.get_dependencies()) from hedge.tools import is_obj_array if is_obj_array(result): externally_used_names |= set(expr for expr in result) else: externally_used_names |= set([result]) def schedule_and_finalize_assignment(ass): dep_mapper = self.dep_mapper_factory() names_exprs = zip(ass.names, ass.exprs) my_assignees = set(name for name, expr in names_exprs) names_exprs_deps = [ (name, expr, set(dep.name for dep in dep_mapper(expr) if isinstance(dep, Variable)) & my_assignees) for name, expr in names_exprs ] ordered_names_exprs = [] available_names = set() while names_exprs_deps: schedulable = [] i = 0 while i < len(names_exprs_deps): name, expr, deps = names_exprs_deps[i] unsatisfied_deps = deps - available_names if not unsatisfied_deps: schedulable.append((str(expr), name, expr)) del names_exprs_deps[i] else: i += 1 # make sure these come out in a constant order schedulable.sort() if schedulable: for key, name, expr in schedulable: ordered_names_exprs.append((name, expr)) available_names.add(name) else: raise RuntimeError("aggregation resulted in an " "impossible assignment") return self.finalize_multi_assign( names=[name for name, expr in ordered_names_exprs], exprs=[expr for name, expr in ordered_names_exprs], do_not_return=[ Variable(name) not in externally_used_names for name, expr in ordered_names_exprs ], priority=ass.priority) return [ schedule_and_finalize_assignment(ass) for ass in processed_assigns ] + other_insns