def unroll(body_gen, total_number, max_unroll=None, start=0): from cgen import For, Line, Block from pytools import flatten if max_unroll is None: max_unroll = total_number result = [] if total_number > max_unroll: loop_items = (total_number // max_unroll) * max_unroll result.extend([ For("unsigned j = 0", "j < %d" % loop_items, "j += %d" % max_unroll, Block(list(flatten( body_gen("(j+%d)" % i) for i in range(max_unroll)))) ), Line() ]) start += loop_items result.extend(flatten( body_gen(i) for i in range(start, total_number))) return result
def __call__(self, *args): result = numpy.empty(self.shape, self.result_dtype) from pytools import flatten self.kernel(result, *tuple(flatten(args))) return result
def map_num_reference_derivative(self, expr): discr = self.places.get_discretization(expr.dofdesc.geometry, expr.dofdesc.discr_stage) from pytools import flatten ref_axes = flatten([axis] * mult for axis, mult in expr.ref_axes) return discr.num_reference_derivative(ref_axes, self.rec(expr.operand))
def generate_icosahedron(r, order): # http://en.wikipedia.org/w/index.php?title=Icosahedron&oldid=387737307 phi = (1+5**(1/2))/2 from pytools import flatten vertices = np.array(sorted(flatten([ (0, pm1*1, pm2*phi), (pm1*1, pm2*phi, 0), (pm1*phi, 0, pm2*1)] for pm1 in [-1, 1] for pm2 in [-1, 1]))).T.copy() top_ring = [11, 7, 1, 2, 8] bottom_ring = [10, 9, 3, 0, 4] bottom_point = 6 top_point = 5 tris = [] l = len(top_ring) for i in range(l): tris.append([top_ring[i], top_ring[(i+1) % l], top_point]) tris.append([bottom_ring[i], bottom_point, bottom_ring[(i+1) % l], ]) tris.append([bottom_ring[i], bottom_ring[(i+1) % l], top_ring[i]]) tris.append([top_ring[i], bottom_ring[(i+1) % l], top_ring[(i+1) % l]]) vertices *= r/la.norm(vertices[:, 0]) vertex_indices = np.array(tris, dtype=np.int32) grp = make_group_from_vertices(vertices, vertex_indices, order) from meshmode.mesh import Mesh return Mesh(vertices, [grp], element_connectivity=None)
def index_list_backend(self, ilists): from pytools import single_valued ilist_length = single_valued(len(il) for il in ilists) assert ilist_length == self.plan.dofs_per_face from cgen import Typedef, POD from pytools import flatten flat_ilists_uncast = numpy.array(list(flatten(ilists))) if numpy.max(flat_ilists_uncast) >= 256: tp = numpy.uint16 else: tp = numpy.uint8 flat_ilists = numpy.asarray(flat_ilists_uncast, dtype=tp) assert (flat_ilists == flat_ilists_uncast).all() return GPUIndexLists( type=tp, code=[Typedef(POD(tp, "index_list_entry_t"))], device_memory=cuda.to_device(flat_ilists), bytes=flat_ilists.size * flat_ilists.itemsize, )
def static_extremum_of_pw_aff(pw_aff, constants_only, set_method, what, context): if context is not None: context = isl.align_spaces(context, pw_aff.get_domain_space(), obj_bigger_ok=True).params() pw_aff = pw_aff.gist(context) pieces = pw_aff.get_pieces() if len(pieces) == 1: (_, result), = pieces if constants_only and not result.is_cst(): raise ValueError("a numeric %s was not found for PwAff '%s'" % (what, pw_aff)) return result from pytools import memoize, flatten @memoize def is_bounded(set): assert set.dim(dim_type.set) == 0 return (set .move_dims(dim_type.set, 0, dim_type.param, 0, set.dim(dim_type.param)) .is_bounded()) # put constant bounds with unbounded validity first order = [ (True, False), # constant, unbounded validity (False, False), # nonconstant, unbounded validity (True, True), # constant, bounded validity (False, True), # nonconstant, bounded validity ] pieces = flatten([ [(set, aff) for set, aff in pieces if aff.is_cst() == want_is_constant and is_bounded(set) == want_is_bounded] for want_is_constant, want_is_bounded in order]) reference = pw_aff.get_aggregate_domain() if context is not None: reference = reference.intersect(context) # {{{ find bounds that are also global bounds for set, candidate_aff in pieces: # gist can be time-consuming, try without first for use_gist in [False, True]: if use_gist: candidate_aff = candidate_aff.gist(set) if constants_only and not candidate_aff.is_cst(): continue if reference <= set_method(pw_aff, candidate_aff): return candidate_aff # }}} raise StaticValueFindingError("a static %s was not found for PwAff '%s'" % (what, pw_aff))
def get_next_step(self, available_names, done_insns): from pytools import all, argmax2 available_insns = [(insn, insn.priority) for insn in self.instructions if insn not in done_insns and all( dep.name in available_names for dep in insn.get_dependencies())] if not available_insns: raise self.NoInstructionAvailable from pytools import flatten discardable_vars = set(available_names) - set( flatten([dep.name for dep in insn.get_dependencies()] for insn in self.instructions if insn not in done_insns)) # {{{ make sure results do not get discarded dm = mappers.DependencyMapper(composite_leaves=False) def remove_result_variable(result_expr): # The extra dependency mapper run is necessary # because, for instance, subscripts can make it # into the result expression, which then does # not consist of just variables. for var in dm(result_expr): assert isinstance(var, Variable) discardable_vars.discard(var.name) obj_array_vectorize(remove_result_variable, self.result) # }}} return argmax2(available_insns), discardable_vars
def trace(self): tensor = np.array([1, 0, 0, 1]) trace_argument = pytools.flatten([[tensor, [i]] for i in range(self.no_qubits)]) return np.einsum(self.dm, list(range(self.no_qubits)), *trace_argument, optimize=True)
def map_num_reference_derivative(self, expr): discr = self.bound_expr.get_discretization(expr.dofdesc) from pytools import flatten ref_axes = flatten([axis] * mult for axis, mult in expr.ref_axes) return discr.num_reference_derivative( self.queue, ref_axes, self.rec(expr.operand)) \ .with_queue(self.queue)
def map_num_reference_derivative(self, expr): discr = self.bound_expr.get_discretization(expr.where) from pytools import flatten ref_axes = flatten([axis] * mult for axis, mult in expr.ref_axes) return discr.num_reference_derivative( self.queue, ref_axes, self.rec(expr.operand)) \ .with_queue(self.queue)
def get_dependencies(self): deps = set() for wdflux in self.expressions: deps |= set(wdflux.interior_deps) deps |= set(wdflux.boundary_deps) dep_mapper = self.dep_mapper_factory() from pytools import flatten return set(flatten(dep_mapper(dep) for dep in deps))
def make_linear_comb_kernel_with_result_dtype( result_dtype, scalar_dtypes, vector_dtypes): comp_count = len(vector_dtypes) from pytools import flatten return ElementwiseKernel([VectorArg(result_dtype, "result")] + list(flatten( (ScalarArg(scalar_dtypes[i], "a%d_fac" % i), VectorArg(vector_dtypes[i], "a%d" % i)) for i in range(comp_count))), "result[i] = " + " + ".join("a%d_fac*a%d[i]" % (i, i) for i in range(comp_count)))
def make_linear_comb_kernel_with_result_dtype(result_dtype, scalar_dtypes, vector_dtypes): comp_count = len(vector_dtypes) from pytools import flatten return ElementwiseKernel([VectorArg(result_dtype, "result")] + list( flatten((ScalarArg(scalar_dtypes[i], "a%d_fac" % i), VectorArg(vector_dtypes[i], "a%d" % i)) for i in range(comp_count))), "result[i] = " + " + ".join("a%d_fac*a%d[i]" % (i, i) for i in range(comp_count)))
def forward_metric_nth_derivative(xyz_axis, ref_axes, dd=None): r""" Pointwise metric derivatives representing repeated derivatives .. math:: \frac{\partial^n x_{\mathrm{xyz\_axis}} }{\partial r_{\mathrm{ref\_axes}}} where *ref_axes* is a multi-index description. :arg ref_axes: a :class:`tuple` of tuples indicating indices of coordinate axes of the reference element to the number of derivatives which will be taken. For example, the value ``((0, 2), (1, 1))`` indicates taking the second derivative with respect to the first axis and the first derivative with respect to the second axis. Each axis must occur only once and the tuple must be sorted by the axis index. May also be a singile integer *i*, which is viewed as equivalent to ``((i, 1),)``. """ if isinstance(ref_axes, int): ref_axes = ((ref_axes, 1),) if not isinstance(ref_axes, tuple): raise ValueError("ref_axes must be a tuple") if tuple(sorted(ref_axes)) != ref_axes: raise ValueError("ref_axes must be sorted") if len(dict(ref_axes)) != len(ref_axes): raise ValueError("ref_axes must not contain an axis more than once") if dd is None: dd = DD_VOLUME inner_dd = dd.with_qtag(QTAG_NONE) from pytools import flatten flat_ref_axes = flatten([rst_axis] * n for rst_axis, n in ref_axes) from grudge.symbolic.operators import RefDiffOperator result = NodeCoordinateComponent(xyz_axis, inner_dd) for rst_axis in flat_ref_axes: result = RefDiffOperator(rst_axis, inner_dd)(result) if dd.uses_quadrature(): from grudge.symbolic.operators import project result = project(inner_dd, dd)(result) prefix = "dx%d_%s" % ( xyz_axis, "_".join("%sr%d" % ("d" * n, rst_axis) for rst_axis, n in ref_axes)) return cse(result, prefix, cse_scope.DISCRETIZATION)
def scalar_loop_args(self): from loopy.kernel.data import ValueArg if self.args is None: return [] else: from pytools import flatten loop_arg_names = list(flatten(dom.get_var_names(dim_type.param) for dom in self.domains)) return [arg.name for arg in self.args if isinstance(arg, ValueArg) if arg.name in loop_arg_names]
def flatten(self, ary): # Return a flat version of *ary*. The returned value is suitable for # use with solvers whose API expects a one-dimensional array. if not self._operator_uses_obj_array: ary = [ary] from arraycontext import flatten result = self.array_context.empty(self.total_dofs, self.dtype) for res_i, (start, end) in zip(ary, self.starts_and_ends): result[start:end] = flatten(res_i, self.array_context) return result
def second_fundamental_form(actx: ArrayContext, dcoll: DiscretizationCollection, dd=None) -> np.ndarray: r"""Computes the second fundamental form: .. math:: S(x) = \begin{bmatrix} \partial_{uu} x\cdot n & \partial_{uv} x\cdot n \\ \partial_{uv} x\cdot n & \partial_{vv} x\cdot n \end{bmatrix} where :math:`n` is the surface normal, :math:`x(u, v)` defines a parameterized surface, and :math:`u,v` are coordinates on the parameterized surface. :arg dd: a :class:`~grudge.dof_desc.DOFDesc`, or a value convertible to one. :returns: a rank-2 object array describing second fundamental form. """ if dd is None: dd = DD_VOLUME dim = dcoll.discr_from_dd(dd).dim normal = rel_mv_normal(actx, dcoll, dd=dd).as_vector(dtype=object) if dim == 1: second_ref_axes = [((0, 2), )] elif dim == 2: second_ref_axes = [((0, 2), ), ((0, 1), (1, 1)), ((1, 2), )] else: raise ValueError("%dD surfaces not supported" % dim) from pytools import flatten form2 = np.empty((dim, dim), dtype=object) for ref_axes in second_ref_axes: i, j = flatten([rst_axis] * n for rst_axis, n in ref_axes) ruv = make_obj_array([ forward_metric_nth_derivative(actx, dcoll, xyz_axis, ref_axes, dd=dd) for xyz_axis in range(dcoll.ambient_dim) ]) form2[i, j] = form2[j, i] = normal.dot(ruv) return form2
def get_dependencies(self, source_files): from codepy.tools import join_continued_lines result, stdout, stderr = call_capture_output( [self.cc] + ["-M"] + ["-D%s" % define for define in self.defines] + ["-U%s" % undefine for undefine in self.undefines] + ["-I%s" % idir for idir in self.include_dirs] + self.cflags + source_files) if result != 0: raise CompileError("getting dependencies failed: " + stderr) lines = join_continued_lines(stdout.split("\n")) from pytools import flatten return set(flatten(line.split()[2:] for line in lines))
def generate_icosahedron( r: float, order: int, *, node_vertex_consistency_tolerance: Optional[Union[float, bool]] = None, unit_nodes: Optional[np.ndarray] = None): # https://en.wikipedia.org/w/index.php?title=Icosahedron&oldid=387737307 phi = (1 + 5**(1 / 2)) / 2 from pytools import flatten vertices = np.array( sorted( flatten([(0, pm1 * 1, pm2 * phi), (pm1 * 1, pm2 * phi, 0), (pm1 * phi, 0, pm2 * 1)] for pm1 in [-1, 1] for pm2 in [-1, 1]))).T.copy() top_ring = [11, 7, 1, 2, 8] bottom_ring = [10, 9, 3, 0, 4] bottom_point = 6 top_point = 5 tris = [] m = len(top_ring) for i in range(m): tris.append([top_ring[i], top_ring[(i + 1) % m], top_point]) tris.append([ bottom_ring[i], bottom_point, bottom_ring[(i + 1) % m], ]) tris.append([bottom_ring[i], bottom_ring[(i + 1) % m], top_ring[i]]) tris.append( [top_ring[i], bottom_ring[(i + 1) % m], top_ring[(i + 1) % m]]) vertices *= r / la.norm(vertices[:, 0]) vertex_indices = np.array(tris, dtype=np.int32) grp = make_group_from_vertices(vertices, vertex_indices, order, unit_nodes=unit_nodes) from meshmode.mesh import Mesh return Mesh( vertices, [grp], node_vertex_consistency_tolerance=node_vertex_consistency_tolerance, is_conforming=True)
def get_dependencies(self): deps = set() from hedge.tools import setify_field as setify from hedge.optemplate import OperatorBinding, BoundaryPair for f in self.expressions: assert isinstance(f, OperatorBinding) if isinstance(f.field, BoundaryPair): deps |= setify(f.field.field) | setify(f.field.bfield) else: deps |= setify(f.field) dep_mapper = self.dep_mapper_factory() from pytools import flatten return set(flatten(dep_mapper(dep) for dep in deps))
def uninstall_from_make_output(text): lines = re.split(r'(\n|\&\&)', text) lines = [ re.sub(r'(^\s+|\s+$)', '', line) for line in lines if re.search(r'\/usr\/bin\/install', line) != None ] lines = [ line for line in lines if re.search(r'\-d', line) == None ] paths = flatten(map(install_paths, lines)) for path in paths: print(path) print(os.path.exists(path)) print(os.path.isfile(path)) if os.path.isfile(path): os.unlink(path)
def to_array(self): single_tensor = ptm.single_tensor in_indices = list(reversed(range(self.no_qubits))) idx = [[i, 2 * self.no_qubits - i, 3 * self.no_qubits - i] for i in in_indices] transformation_tensors = list( zip([single_tensor] * self.no_qubits, idx)) transformation_tensors = pytools.flatten(transformation_tensors) density_matrix = np.einsum(self.dm, in_indices, *transformation_tensors, optimize=True) density_matrix = density_matrix.reshape( (2**self.no_qubits, 2**self.no_qubits)) return density_matrix
def get_dependencies(self, source_files): from codepy.tools import join_continued_lines from pytools.prefork import call_capture_output result, stdout, stderr = call_capture_output( [self.cc] + ["-M"] + ["-D%s" % define for define in self.defines] + ["-U%s" % undefine for undefine in self.defines] + ["-I%s" % idir for idir in self.include_dirs] + source_files ) if result != 0: raise CompileError("getting dependencies failed: "+stderr) lines = join_continued_lines(stdout.split("\n")) from pytools import flatten return set(flatten( line.split()[2:] for line in lines))
def get_dependencies(self, source_files): from codepy.tools import join_continued_lines result, stdout, stderr = call_capture_output( [self.cc] + ["-M"] + [f"-D{define}" for define in self.defines] + [f"-U{undefine}" for undefine in self.undefines] + [f"-I{idir}" for idir in self.include_dirs] + self.cflags + source_files ) if result != 0: raise CompileError(f"getting dependencies failed: {stderr}") lines = join_continued_lines(stdout.split("\n")) lines = [line for line in lines if not (line.strip() and line.strip()[0] == "#")] from pytools import flatten return set(flatten( line.split()[2:] for line in lines))
def volume_to_face_up_interpolation_matrix(self): """Generate a matrix that maps volume nodal values to a vector of face nodal values on the quadrature grid, with faces immediately concatenated, i.e:: [face 1 nodal data][face 2 nodal data]... """ ldis = self.ldis face_maps = ldis.face_affine_maps() from pytools import flatten face_nodes = list( flatten([face_map(qnode) for qnode in self.face_nodes] for face_map in face_maps)) from hedge.polynomial import generic_vandermonde vdm = generic_vandermonde(face_nodes, list(ldis.basis_functions())) from hedge.tools.linalg import leftsolve return leftsolve(self.ldis.vandermonde(), vdm)
def face_affine_maps(self): """Return an affine map for each face that maps the (n-1)-dimensional face unit coordinates to their volume coordintes. """ face_vertex_node_index_lists = \ self.geometry.face_vertices(self.vertex_indices()) from pytools import flatten, one vertex_node_indices = set(flatten(face_vertex_node_index_lists)) def find_missing_node(face_vertex_node_indices): return unit_nodes[one(vertex_node_indices - set(face_vertex_node_indices))] unit_nodes = self.unit_nodes() sets_of_to_points = [ [unit_nodes[fvni] for fvni in face_vertex_node_indices] + [find_missing_node(face_vertex_node_indices)] for face_vertex_node_indices in face_vertex_node_index_lists ] from_points = sets_of_to_points[0] # Construct an affine map that promotes face nodes into volume # by appending -1, this should end up on the first face dim = self.dimensions from hedge.tools.affine import AffineMap from hedge.tools.linalg import unit_vector to_face_1 = AffineMap( numpy.vstack([ numpy.eye(dim - 1, dtype=numpy.float64), numpy.zeros(dim - 1) ]), -unit_vector(dim, dim - 1, dtype=numpy.float64)) def finish_affine_map(amap): return amap.post_compose(to_face_1) from hedge.tools.affine import identify_affine_map return [ finish_affine_map(identify_affine_map(from_points, to_points)) for to_points in sets_of_to_points ]
def face_affine_maps(self): """Return an affine map for each face that maps the (n-1)-dimensional face unit coordinates to their volume coordintes. """ face_vertex_node_index_lists = \ self.geometry.face_vertices(self.vertex_indices()) from pytools import flatten, one vertex_node_indices = set(flatten(face_vertex_node_index_lists)) def find_missing_node(face_vertex_node_indices): return unit_nodes[one( vertex_node_indices - set(face_vertex_node_indices))] unit_nodes = self.unit_nodes() sets_of_to_points = [[unit_nodes[fvni] for fvni in face_vertex_node_indices] + [find_missing_node(face_vertex_node_indices)] for face_vertex_node_indices in face_vertex_node_index_lists] from_points = sets_of_to_points[0] # Construct an affine map that promotes face nodes into volume # by appending -1, this should end up on the first face dim = self.dimensions from hedge.tools.affine import AffineMap from hedge.tools.linalg import unit_vector to_face_1 = AffineMap( numpy.vstack([ numpy.eye(dim-1, dtype=numpy.float64), numpy.zeros(dim-1)]), -unit_vector(dim, dim-1, dtype=numpy.float64)) def finish_affine_map(amap): return amap.post_compose(to_face_1) from hedge.tools.affine import identify_affine_map return [ finish_affine_map( identify_affine_map(from_points, to_points)) for to_points in sets_of_to_points]
def volume_to_face_up_interpolation_matrix(self): """Generate a matrix that maps volume nodal values to a vector of face nodal values on the quadrature grid, with faces immediately concatenated, i.e:: [face 1 nodal data][face 2 nodal data]... """ ldis = self.ldis face_maps = ldis.face_affine_maps() from pytools import flatten face_nodes = list(flatten([face_map(qnode) for qnode in self.face_nodes] for face_map in face_maps)) from hedge.polynomial import generic_vandermonde vdm = generic_vandermonde(face_nodes, list(ldis.basis_functions())) from hedge.tools.linalg import leftsolve return leftsolve(self.ldis.vandermonde(), vdm)
def get_next_step(self, available_names, done_insns): from pytools import all, argmax2 available_insns = [ (insn, insn.priority) for insn in self.instructions if insn not in done_insns and all(dep.name in available_names for dep in insn.get_dependencies())] if not available_insns: raise self.NoInstructionAvailable from pytools import flatten discardable_vars = set(available_names) - set(flatten( [dep.name for dep in insn.get_dependencies()] for insn in self.instructions if insn not in done_insns)) # {{{ make sure results do not get discarded from hedge.tools import with_object_array_or_scalar from hedge.optemplate.mappers import DependencyMapper dm = DependencyMapper(composite_leaves=False) def remove_result_variable(result_expr): # The extra dependency mapper run is necessary # because, for instance, subscripts can make it # into the result expression, which then does # not consist of just variables. for var in dm(result_expr): from pymbolic.primitives import Variable assert isinstance(var, Variable) discardable_vars.discard(var.name) with_object_array_or_scalar(remove_result_variable, self.result) # }}} return argmax2(available_insns), discardable_vars
def second_fundamental_form(ambient_dim, dim=None, dd=None): if dim is None: dim = ambient_dim - 1 normal = surface_normal(ambient_dim, dim=dim, dd=dd).as_vector() if dim == 1: second_ref_axes = [((0, 2),)] elif dim == 2: second_ref_axes = [((0, 2),), ((0, 1), (1, 1)), ((1, 2),)] else: raise ValueError("%dD surfaces not supported" % dim) from pytools import flatten form2 = np.empty((dim, dim), dtype=object) for ref_axes in second_ref_axes: i, j = flatten([rst_axis] * n for rst_axis, n in ref_axes) ruv = np.array([ forward_metric_nth_derivative(xyz_axis, ref_axes, dd=dd) for xyz_axis in range(ambient_dim)]) form2[i, j] = form2[j, i] = normal.dot(ruv) return cse(form2, "form2_mat", cse_scope.DISCRETIZATION)
def __init__(self, no_qubits, data=None): if no_qubits > 15: raise ValueError( "no_qubits=%d is way too many qubits, are you sure?" % no_qubits) self.no_qubits = no_qubits self.shape = [4] * no_qubits if isinstance(data, np.ndarray): single_tensor = ptm.single_tensor assert data.size == 4**self.no_qubits data = data.reshape((2, 2) * self.no_qubits) in_indices = list( reversed(range(self.no_qubits, 3 * self.no_qubits))) contraction_indices = [(i, i + self.no_qubits, i + 2 * self.no_qubits) for i in range(self.no_qubits)] out_indices = list(reversed(range(self.no_qubits))) transformation_tensors = list( zip([single_tensor] * self.no_qubits, contraction_indices)) transformation_tensors = pytools.flatten(transformation_tensors) self.dm = np.einsum(data, in_indices, *transformation_tensors, out_indices, optimize=True).real elif data is None: self.dm = np.zeros(self.shape) self.dm[tuple([0] * self.no_qubits)] = 1 else: raise ValueError("type of data not understood")
def generate_icosahedron(r, order): # http://en.wikipedia.org/w/index.php?title=Icosahedron&oldid=387737307 phi = (1+5**(1/2))/2 from pytools import flatten vertices = np.array(sorted(flatten([ (0, pm1*1, pm2*phi), (pm1*1, pm2*phi, 0), (pm1*phi, 0, pm2*1)] for pm1 in [-1, 1] for pm2 in [-1, 1]))).T.copy() top_ring = [11, 7, 1, 2, 8] bottom_ring = [10, 9, 3, 0, 4] bottom_point = 6 top_point = 5 tris = [] l = len(top_ring) for i in range(l): tris.append([top_ring[i], top_ring[(i+1) % l], top_point]) tris.append([bottom_ring[i], bottom_point, bottom_ring[(i+1) % l], ]) tris.append([bottom_ring[i], bottom_ring[(i+1) % l], top_ring[i]]) tris.append([top_ring[i], bottom_ring[(i+1) % l], top_ring[(i+1) % l]]) vertices *= r/la.norm(vertices[:, 0]) vertex_indices = np.array(tris, dtype=np.int32) grp = make_group_from_vertices(vertices, vertex_indices, order) from meshmode.mesh import Mesh return Mesh( vertices, [grp], nodal_adjacency=None, facial_adjacency_groups=None)
def _compute_nodal_adjacency_from_vertices(mesh): # FIXME Native code would make this faster _, nvertices = mesh.vertices.shape vertex_to_element = [[] for i in range(nvertices)] for grp in mesh.groups: iel_base = grp.element_nr_base for iel_grp in range(grp.nelements): for ivertex in grp.vertex_indices[iel_grp]: vertex_to_element[ivertex].append(iel_base + iel_grp) element_to_element = [set() for i in range(mesh.nelements)] for grp in mesh.groups: iel_base = grp.element_nr_base for iel_grp in range(grp.nelements): for ivertex in grp.vertex_indices[iel_grp]: element_to_element[iel_base + iel_grp].update( vertex_to_element[ivertex]) for iel, neighbors in enumerate(element_to_element): neighbors.remove(iel) lengths = [len(el_list) for el_list in element_to_element] neighbors_starts = np.cumsum( np.array([0] + lengths, dtype=mesh.element_id_dtype)) from pytools import flatten neighbors = np.array( list(flatten(element_to_element)), dtype=mesh.element_id_dtype) assert neighbors_starts[-1] == len(neighbors) return NodalAdjacency( neighbors_starts=neighbors_starts, neighbors=neighbors)
def generate_nodal_adjacency(self, nelements, nvertices, groups): # medium-term FIXME: make this an incremental update # rather than build-from-scratch vertex_to_element = [[] for i in range(nvertices)] element_index = 0 for grp in groups: for iel_grp in range(len(grp)): for ivertex in grp[iel_grp]: vertex_to_element[ivertex].append(element_index) element_index += 1 element_to_element = [set() for i in range(nelements)] element_index = 0 if self.lazy: for grp in groups: for iel_grp in range(len(grp)): for i in range(len(grp[iel_grp])): for j in range(i+1, len(grp[iel_grp])): vertex_pair = (min(grp[iel_grp][i], grp[iel_grp][j]), max(grp[iel_grp][i], grp[iel_grp][j])) #print 'iel:', iel_grp, 'pair:', vertex_pair if vertex_pair not in self.seen_tuple: self.propagate_tree(self.get_root(self.pair_map[vertex_pair]), self.hanging_vertex_element, element_to_element) #print self.pair_map[vertex_pair].left_vertex, self.pair_map[vertex_pair].right_vertex, self.pair_map[vertex_pair].adjacent_elements, self.hanging_vertex_element[self.pair_map[vertex_pair].left_vertex], self.hanging_vertex_element[self.pair_map[vertex_pair].right_vertex] else: for grp in groups: for iel_grp in range(len(grp)): for ivertex in grp[iel_grp]: element_to_element[element_index].update( vertex_to_element[ivertex]) if self.hanging_vertex_element[ivertex]: for hanging_element in self.hanging_vertex_element[ivertex]: if element_index != hanging_element: element_to_element[element_index].update([hanging_element]) element_to_element[hanging_element].update([element_index]) for i in range(len(grp[iel_grp])): for j in range(i+1, len(grp[iel_grp])): vertex_pair = (min(grp[iel_grp][i], grp[iel_grp][j]), max(grp[iel_grp][i], grp[iel_grp][j])) #element_to_element[element_index].update( #self.pair_map[vertex_pair].adjacent_elements) queue = [self.pair_map[vertex_pair]] while queue: vertex = queue.pop(0) #if leaf node if vertex.left is None and vertex.right is None: assert(element_index in vertex.adjacent_elements) element_to_element[element_index].update( vertex.adjacent_elements) else: queue.append(vertex.left) queue.append(vertex.right) ''' if self.hanging_vertex_element[ivertex] and element_index != self.hanging_vertex_element[ivertex][0]: element_to_element[element_index].update([self.hanging_vertex_element[ivertex][0]]) element_to_element[self.hanging_vertex_element[ivertex][0]].update([element_index]) ''' element_index += 1 logger.debug("number of new elements: %d" % len(element_to_element)) for iel, neighbors in enumerate(element_to_element): if iel in neighbors: neighbors.remove(iel) #print(self.ray_elements) ''' for ray in self.rays: curnode = ray.first while curnode is not None: if len(curnode.value.elements) >= 2: if curnode.value.elements[0] is not None: element_to_element[curnode.value.elements[0]].update(curnode.value.elements) if curnode.value.elements[1] is not None: element_to_element[curnode.value.elements[1]].update(curnode.value.elements) if len(curnode.value.velements) >= 2: if curnode.value.velements[0] is not None: element_to_element[curnode.value.velements[0]].update(curnode.value.velements) if curnode.value.velements[1] is not None: element_to_element[curnode.value.velements[1]].update(curnode.value.velements) curnode = curnode.next ''' ''' for i in self.ray_elements: for j in i: #print j[0], j[1] element_to_element[j[0]].update(j) element_to_element[j[1]].update(j) ''' #print element_to_element lengths = [len(el_list) for el_list in element_to_element] neighbors_starts = np.cumsum( np.array([0] + lengths, dtype=self.last_mesh.element_id_dtype), # cumsum silently widens integer types dtype=self.last_mesh.element_id_dtype) from pytools import flatten neighbors = np.array( list(flatten(element_to_element)), dtype=self.last_mesh.element_id_dtype) assert neighbors_starts[-1] == len(neighbors) from meshmode.mesh import NodalAdjacency return NodalAdjacency(neighbors_starts=neighbors_starts, neighbors=neighbors)
def combine(self, values): from pytools import flatten return set(flatten(values))
def get_boundary_flux_mod(fluxes, fvi, discr, dtype): from cgen import \ FunctionDeclaration, FunctionBody, Typedef, Struct, \ Const, Reference, Value, POD, MaybeUnused, \ Statement, Include, Line, Block, Initializer, Assign, \ CustomLoop, For from pytools import to_uncomplex_dtype, flatten from codepy.bpl import BoostPythonModule mod = BoostPythonModule() mod.add_to_preamble([ Include("cstdlib"), Include("algorithm"), Line(), Include("boost/foreach.hpp"), Line(), Include("hedge/face_operators.hpp"), ]) S = Statement mod.add_to_module([ S("using namespace hedge"), S("using namespace pyublas"), Line(), Typedef(POD(dtype, "value_type")), Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")), ]) arg_struct = Struct("arg_struct", [ Value("numpy_array<value_type>", "flux%d_on_faces" % i) for i in range(len(fluxes)) ]+[ Value("numpy_array<value_type>", arg_name) for arg_name in fvi.arg_names ]) mod.add_struct(arg_struct, "ArgStruct") mod.add_to_module([Line()]) fdecl = FunctionDeclaration( Value("void", "gather_flux"), [ Const(Reference(Value("face_group<face_pair<straight_face> >" , "fg"))), Reference(Value("arg_struct", "args")) ]) from pymbolic.mapper.stringifier import PREC_PRODUCT def gen_flux_code(): f2cm = FluxToCodeMapper() result = [ Assign("fof%d_it[loc_fof_base+i]" % flux_idx, "uncomplex_type(fp.int_side.face_jacobian) * " + flux_to_code(f2cm, False, flux_idx, fvi, flux.op.flux, PREC_PRODUCT)) for flux_idx, flux in enumerate(fluxes) ] return [ Initializer(Value("value_type", cse_name), cse_str) for cse_name, cse_str in f2cm.cse_name_list] + result fbody = Block([ Initializer( Const(Value("numpy_array<value_type>::iterator", "fof%d_it" % i)), "args.flux%d_on_faces.begin()" % i) for i in range(len(fluxes)) ]+[ Initializer( Const(Value("numpy_array<value_type>::const_iterator", "%s_it" % arg_name)), "args.%s.begin()" % arg_name) for arg_name in fvi.arg_names ]+[ Line(), CustomLoop("BOOST_FOREACH(const face_pair<straight_face> &fp, fg.face_pairs)", Block( list(flatten([ Initializer(Value("node_number_t", "%s_ebi" % where), "fp.%s.el_base_index" % where), Initializer(Value("index_lists_t::const_iterator", "%s_idx_list" % where), "fg.index_list(fp.%s.face_index_list_number)" % where), Line(), ] for where in ["int_side", "ext_side"] ))+[ Line(), Initializer(Value("node_number_t", "loc_fof_base"), "fg.face_length()*(fp.%(where)s.local_el_number*fg.face_count" " + fp.%(where)s.face_id)" % {"where": "int_side"}), Line(), For( "unsigned i = 0", "i < fg.face_length()", "++i", Block( [ Initializer(MaybeUnused( Value("node_number_t", "%s_idx" % where)), "%(where)s_ebi + %(where)s_idx_list[i]" % {"where": where}) for where in ["int_side", "ext_side"] ]+gen_flux_code() ) ) ])) ]) mod.add_function(FunctionBody(fdecl, fbody)) #print "----------------------------------------------------------------" #print mod.generate() #raw_input("[Enter]") return mod.compile(get_flux_toolchain(discr, fluxes))
def _setup_neighbor_connections(self): comm = self.context.communicator # Why is this barrier needed? Some of our ranks may arrive at this # point early and start sending packets to ranks that are still stuck # in previous wildcard-recv loops. These receivers will then be very # confused by packets they didn't expect, and, once they reach their # recv bit in *this* subroutine, will wait for packets that will never # arrive. This same argument does not apply to other recv()s in this # file because they are targeted and thus benefit from MPI's # non-overtaking rule. # # Parallel programming is fun. comm.Barrier() if self.neighbor_ranks: # send interface information to neighboring ranks ----------------- from pytools import reverse_dictionary local2global_vertex_indices = \ reverse_dictionary(self.global2local_vertex_indices) send_requests = [] for rank in self.neighbor_ranks: bdry_tag = hedge.mesh.TAG_RANK_BOUNDARY(rank) rank_bdry = self.subdiscr.mesh.tag_to_boundary[bdry_tag] rank_discr_boundary = self.subdiscr.get_boundary(bdry_tag) # a list of global vertex numbers for each face my_vertices_global = [ tuple(local2global_vertex_indices[vi] for vi in el.faces[face_nr]) for el, face_nr in rank_bdry] # a list of node coordinates, indicating the order # in which nodal values will be sent, this is for # testing only and could (potentially) be omitted my_node_coords = [] for el, face_nr in rank_bdry: eslice, ldis = self.subdiscr.find_el_data(el.id) findices = ldis.face_indices()[face_nr] my_node_coords.append( [self.nodes[eslice.start+i] for i in findices]) # compile a list of FluxFace.h values for unification # across the rank boundary my_h_values = [rank_discr_boundary.find_facepair_side(el_face).h for el_face in rank_bdry] packet = (my_vertices_global, my_node_coords, my_h_values) send_requests.append(comm.isend(packet, dest=rank, tag=0)) received_packets = {} while len(received_packets) < len(self.neighbor_ranks): status = mpi.Status() received_packet = comm.recv(tag=0, source=mpi.ANY_SOURCE, status=status) received_packets[status.source] = received_packet mpi.Request.Waitall(send_requests) # process received packets ---------------------------------------- from pytools import flatten # nb_ stands for neighbor_ self.from_neighbor_maps = {} for rank, (nb_all_facevertices_global, nb_node_coords, nb_h_values) in \ received_packets.iteritems(): bdry_tag = hedge.mesh.TAG_RANK_BOUNDARY(rank) rank_bdry = self.subdiscr.mesh.tag_to_boundary[bdry_tag] rank_discr_boundary = self.subdiscr.get_boundary(bdry_tag) flat_nb_node_coords = list(flatten(nb_node_coords)) # step 1: find start node indices for each # of the neighbor's elements nb_face_starts = [0] for node_coords in nb_node_coords[:-1]: nb_face_starts.append( nb_face_starts[-1]+len(node_coords)) # step 2: match faces by matching vertices nb_face_order = dict( (frozenset(vertices), i) for i, vertices in enumerate(nb_all_facevertices_global)) # step 3: make a list of indices into the data we # receive from our neighbor that'll tell us how # to reshuffle them to match our node order from_indices = [] shuffled_indices_cache = {} def get_shuffled_indices(face_node_count, shuffle_op): try: return shuffled_indices_cache[shuffle_op] except KeyError: unshuffled_indices = range(face_node_count) result = shuffled_indices_cache[shuffle_op] = \ shuffle_op(unshuffled_indices) return result for el, face_nr in rank_bdry: eslice, ldis = self.subdiscr.find_el_data(el.id) my_vertices = el.faces[face_nr] my_global_vertices = tuple(local2global_vertex_indices[vi] for vi in my_vertices) face_node_count = ldis.face_node_count() try: nb_face_idx = nb_face_order[frozenset(my_global_vertices)] # continue below in else part except KeyError: # this happens if my_global_vertices is not a permutation # of the neighbor's face vertices. Periodicity is the only # reason why that would be so. my_vertices_there, axis = self.global_periodic_opposite_faces[ my_global_vertices] nb_face_idx = nb_face_order[frozenset(my_vertices_there)] his_vertices_here, axis2 = self.global_periodic_opposite_faces[ nb_all_facevertices_global[nb_face_idx]] assert axis == axis2 nb_face_start = nb_face_starts[nb_face_idx] shuffle_op = \ ldis.get_face_index_shuffle_to_match( my_global_vertices, his_vertices_here) shuffled_other_node_indices = [nb_face_start+i for i in get_shuffled_indices( face_node_count, shuffle_op)] from_indices.extend(shuffled_other_node_indices) # check if the nodes really match up if "parallel_setup" in self.debug: my_node_indices = [eslice.start+i for i in ldis.face_indices()[face_nr]] for my_i, other_i in zip(my_node_indices, shuffled_other_node_indices): dist = self.nodes[my_i]-flat_nb_node_coords[other_i] dist[axis] = 0 assert la.norm(dist) < 1e-14 else: # continue handling of nonperiodic case nb_global_vertices = nb_all_facevertices_global[nb_face_idx] nb_face_start = nb_face_starts[nb_face_idx] shuffle_op = \ ldis.get_face_index_shuffle_to_match( my_global_vertices, nb_global_vertices) shuffled_other_node_indices = [nb_face_start+i for i in get_shuffled_indices( face_node_count, shuffle_op)] from_indices.extend(shuffled_other_node_indices) # check if the nodes really match up if "parallel_setup" in self.debug: my_node_indices = [eslice.start+i for i in ldis.face_indices()[face_nr]] for my_i, other_i in zip(my_node_indices, shuffled_other_node_indices): dist = self.nodes[my_i]-flat_nb_node_coords[other_i] assert la.norm(dist) < 1e-14 # finally, unify FluxFace.h values across boundary nb_h = nb_h_values[nb_face_idx] flux_face = rank_discr_boundary.find_facepair_side((el, face_nr)) flux_face.h = max(nb_h, flux_face.h) if "parallel_setup" in self.debug: assert len(from_indices) == len(flat_nb_node_coords) # construct from_neighbor_map self.from_neighbor_maps[rank] = \ self.subdiscr.prepare_from_neighbor_map(from_indices)
def make_method_matrix(stepper, rhss, f_size, s_size): from pymbolic import var from pytools.obj_array import make_obj_array f = make_obj_array([var("f%d" % i) for i in range(f_size)]) s = make_obj_array([var("s%d" % i) for i in range(s_size)]) from hedge.timestep.multirate_ab.methods import (HIST_F2F, HIST_S2F, HIST_F2S, HIST_S2S, HIST_NAMES) hist_sizes = { HIST_F2F: f_size, HIST_S2F: f_size, HIST_S2S: s_size, HIST_F2S: s_size } orig_histories = {} for hn in HIST_NAMES: my_size = hist_sizes[hn] my_length = stepper.orders[hn] hist_name_str = hn.__name__[5:].lower() hist = [ make_obj_array([ var("h_%s_%d_%d" % (hist_name_str, age, i)) for i in range(s_size) ]) for age in range(my_length) ] stepper.histories[hn] = hist orig_histories[hn] = hist[:] stepper.startup_stepper = None del stepper.startup_history f_post_step, s_post_step = stepper([f, s], 0, rhss) def matrix_from_expressions(row_exprs, column_exprs): row_exprs = [fold_constants(expand(expr)) for expr in row_exprs] result = numpy.zeros((len(row_exprs), len(column_exprs)), dtype=object) for i, row_expr in enumerate(row_exprs): for j, col_expr in enumerate(column_exprs): result[i, j] = differentiate(row_expr, col_expr) return result from pytools import flatten row_exprs = list( flatten([ f_post_step, s_post_step, ] + list(flatten([stepper.histories[hn] for hn in HIST_NAMES])))) column_exprs = list( flatten([f, s] + list(flatten([orig_histories[hn] for hn in HIST_NAMES])))) return matrix_from_expressions(row_exprs, column_exprs), \ column_exprs
def partition_mesh(mesh, partition, part_bdry_tag_factory): """*partition* is a mapping that maps element id to integers that represent different pieces of the mesh. For historical reasons, the values in partition are called 'parts'. """ # Find parts to which we need to distribute. all_parts = list(set(partition[el.id] for el in mesh.elements)) # Prepare a mapping of elements to tags to speed up # copy_el_tagger, below. el2tags = {} for tag, elements in mesh.tag_to_elements.iteritems(): if tag == hedge.mesh.TAG_ALL: continue for el in elements: el2tags.setdefault(el, []).append(tag) # prepare a mapping of (el, face_nr) to boundary_tags # to speed up partition_bdry_tagger, below elface2tags = {} for tag, elfaces in mesh.tag_to_boundary.iteritems(): if tag == hedge.mesh.TAG_ALL: continue for el, fn in elfaces: elface2tags.setdefault((el, fn), []).append(tag) # prepare a mapping from (el, face_nr) to the part # at the other end of the interface, if different from # current. concurrently, prepare a mapping # part -> set([parts that border me]) elface2part = {} neighboring_parts = {} for elface1, elface2 in mesh.interfaces: e1, f1 = elface1 e2, f2 = elface2 r1 = partition[e1.id] r2 = partition[e2.id] if r1 != r2: neighboring_parts.setdefault(r1, set()).add(r2) neighboring_parts.setdefault(r2, set()).add(r1) elface2part[elface1] = r2 elface2part[elface2] = r1 # prepare a new mesh for each part and send it from hedge.mesh import TAG_NO_BOUNDARY for part in all_parts: part_global_elements = [ el for el in mesh.elements if partition[el.id] == part ] # pick out this part's vertices from pytools import flatten part_global_vertex_indices = set( flatten(el.vertex_indices for el in part_global_elements)) part_local_vertices = [ mesh.points[vi] for vi in part_global_vertex_indices ] # find global-to-local maps part_global2local_vertex_indices = dict( (gvi, lvi) for lvi, gvi in enumerate(part_global_vertex_indices)) part_global2local_elements = dict( (el.id, i) for i, el in enumerate(part_global_elements)) # find elements in local numbering part_local_elements = [[ part_global2local_vertex_indices[vi] for vi in el.vertex_indices ] for el in part_global_elements] # make new local Mesh object, including # boundary and element tagging def partition_bdry_tagger(fvi, local_el, fn, all_vertices): el = part_global_elements[local_el.id] result = elface2tags.get((el, fn), []) try: opp_part = elface2part[el, fn] result.append(part_bdry_tag_factory(opp_part)) # keeps this part of the boundary from falling # under TAG_ALL. result.append(TAG_NO_BOUNDARY) except KeyError: pass return result def copy_el_tagger(local_el, all_vertices): return el2tags.get(part_global_elements[local_el.id], []) def is_partbdry_face((local_el, face_nr)): return (part_global_elements[local_el.id], face_nr) in elface2part from hedge.mesh import make_conformal_mesh part_mesh = make_conformal_mesh(part_local_vertices, part_local_elements, partition_bdry_tagger, copy_el_tagger, mesh.periodicity, is_partbdry_face) # assemble per-part data my_nb_parts = neighboring_parts.get(part, []) yield PartitionData(part, part_mesh, part_global2local_elements, part_global2local_vertex_indices, my_nb_parts, mesh.periodic_opposite_faces, part_boundary_tags=dict( (nb_part, part_bdry_tag_factory(nb_part)) for nb_part in my_nb_parts), tag_to_elements=part_mesh.tag_to_elements)
def partition_mesh(mesh, partition, part_bdry_tag_factory): """*partition* is a mapping that maps element id to integers that represent different pieces of the mesh. For historical reasons, the values in partition are called 'parts'. """ # Find parts to which we need to distribute. all_parts = list(set( partition[el.id] for el in mesh.elements)) # Prepare a mapping of elements to tags to speed up # copy_el_tagger, below. el2tags = {} for tag, elements in mesh.tag_to_elements.iteritems(): if tag == hedge.mesh.TAG_ALL: continue for el in elements: el2tags.setdefault(el, []).append(tag) # prepare a mapping of (el, face_nr) to boundary_tags # to speed up partition_bdry_tagger, below elface2tags = {} for tag, elfaces in mesh.tag_to_boundary.iteritems(): if tag == hedge.mesh.TAG_ALL: continue for el, fn in elfaces: elface2tags.setdefault((el, fn), []).append(tag) # prepare a mapping from (el, face_nr) to the part # at the other end of the interface, if different from # current. concurrently, prepare a mapping # part -> set([parts that border me]) elface2part = {} neighboring_parts = {} for elface1, elface2 in mesh.interfaces: e1, f1 = elface1 e2, f2 = elface2 r1 = partition[e1.id] r2 = partition[e2.id] if r1 != r2: neighboring_parts.setdefault(r1, set()).add(r2) neighboring_parts.setdefault(r2, set()).add(r1) elface2part[elface1] = r2 elface2part[elface2] = r1 # prepare a new mesh for each part and send it from hedge.mesh import TAG_NO_BOUNDARY for part in all_parts: part_global_elements = [el for el in mesh.elements if partition[el.id] == part] # pick out this part's vertices from pytools import flatten part_global_vertex_indices = set(flatten( el.vertex_indices for el in part_global_elements)) part_local_vertices = [mesh.points[vi] for vi in part_global_vertex_indices] # find global-to-local maps part_global2local_vertex_indices = dict( (gvi, lvi) for lvi, gvi in enumerate(part_global_vertex_indices)) part_global2local_elements = dict( (el.id, i) for i, el in enumerate(part_global_elements)) # find elements in local numbering part_local_elements = [ [part_global2local_vertex_indices[vi] for vi in el.vertex_indices] for el in part_global_elements] # make new local Mesh object, including # boundary and element tagging def partition_bdry_tagger(fvi, local_el, fn, all_vertices): el = part_global_elements[local_el.id] result = elface2tags.get((el, fn), []) try: opp_part = elface2part[el, fn] result.append(part_bdry_tag_factory(opp_part)) # keeps this part of the boundary from falling # under TAG_ALL. result.append(TAG_NO_BOUNDARY) except KeyError: pass return result def copy_el_tagger(local_el, all_vertices): return el2tags.get(part_global_elements[local_el.id], []) def is_partbdry_face((local_el, face_nr)): return (part_global_elements[local_el.id], face_nr) in elface2part from hedge.mesh import make_conformal_mesh part_mesh = make_conformal_mesh( part_local_vertices, part_local_elements, partition_bdry_tagger, copy_el_tagger, mesh.periodicity, is_partbdry_face) # assemble per-part data my_nb_parts = neighboring_parts.get(part, []) yield PartitionData( part, part_mesh, part_global2local_elements, part_global2local_vertex_indices, my_nb_parts, mesh.periodic_opposite_faces, part_boundary_tags=dict( (nb_part, part_bdry_tag_factory(nb_part)) for nb_part in my_nb_parts), tag_to_elements = part_mesh.tag_to_elements )
def diagonal(self, *, get_data=True, target_array=None, flatten=True): """Obtain the diagonal of the density matrix. Parameters ---------- target_array : None or pycuda.gpuarray.array An already-allocated GPU array to which the data will be copied. If `None`, make a new GPU array. get_data : boolean Whether the data should be copied from the GPU. flatten : boolean TODO docstring """ diag_bases = [pb.computational_subbasis() for pb in self.bases] diag_shape = [db.dim_pauli for db in diag_bases] diag_size = pytools.product(diag_shape) if target_array is None: if self._work_data.gpudata.size < diag_size * 8: self._work_data.gpudata.free() self._work_data = ga.empty(diag_shape, np.float64) self._work_data.gpudata.size = self._work_data.nbytes target_array = self._work_data else: if target_array.size < diag_size: raise ValueError( "Size of `target_gpu_array` is too small ({}).\n" "Should be at least {}." .format(target_array.size, diag_size)) idx = [[pb.computational_basis_indices[i] for i in range(pb.dim_hilbert) if pb.computational_basis_indices[i] is not None] for pb in self.bases] idx_j = np.array(list(pytools.flatten(idx))).astype(np.uint32) idx_i = np.cumsum([0] + [len(i) for i in idx][:-1]).astype(np.uint32) xshape = np.array(self._data.shape, np.uint32) yshape = np.array(diag_shape, np.uint32) xshape_gpu = self._cached_gpuarray(xshape) yshape_gpu = self._cached_gpuarray(yshape) idx_i_gpu = self._cached_gpuarray(idx_i) idx_j_gpu = self._cached_gpuarray(idx_j) block = (2 ** 8, 1, 1) grid = (max(1, (diag_size - 1) // 2 ** 8 + 1), 1, 1) if len(yshape) == 0: # brain-dead case, but should be handled according to exp. target_array.set(self._data.get()) else: _multitake.prepared_call( grid, block, self._data.gpudata, target_array.gpudata, idx_i_gpu.gpudata, idx_j_gpu.gpudata, xshape_gpu.gpudata, yshape_gpu.gpudata, np.uint32(len(yshape)) ) if get_data: if flatten: return target_array.get().ravel()[:diag_size] else: return (target_array.get().ravel()[:diag_size] .reshape(diag_shape)) else: return ga.GPUArray(shape=diag_shape, gpudata=target_array.gpudata, dtype=np.float64)
def make_boundary_restriction(queue, discr, group_factory): """ :return: a tuple ``(bdry_mesh, bdry_discr, connection)`` """ logger.info("building boundary connection: start") # {{{ build face_map # maps (igrp, el_grp, face_id) to a frozenset of vertex IDs face_map = {} for igrp, mgrp in enumerate(discr.mesh.groups): grp_face_vertex_indices = mgrp.face_vertex_indices() for iel_grp in range(mgrp.nelements): for fid, loc_face_vertices in enumerate(grp_face_vertex_indices): face_vertices = frozenset( mgrp.vertex_indices[iel_grp, fvi] for fvi in loc_face_vertices ) face_map.setdefault(face_vertices, []).append( (igrp, iel_grp, fid)) del face_vertices # }}} boundary_faces = [ face_ids[0] for face_vertices, face_ids in six.iteritems(face_map) if len(face_ids) == 1] from pytools import flatten bdry_vertex_vol_nrs = sorted(set(flatten(six.iterkeys(face_map)))) vol_to_bdry_vertices = np.empty( discr.mesh.vertices.shape[-1], discr.mesh.vertices.dtype) vol_to_bdry_vertices.fill(-1) vol_to_bdry_vertices[bdry_vertex_vol_nrs] = np.arange( len(bdry_vertex_vol_nrs)) bdry_vertices = discr.mesh.vertices[:, bdry_vertex_vol_nrs] from meshmode.mesh import Mesh, SimplexElementGroup bdry_mesh_groups = [] connection_data = {} for igrp, grp in enumerate(discr.groups): mgrp = grp.mesh_el_group group_boundary_faces = [ (ibface_el, ibface_face) for ibface_group, ibface_el, ibface_face in boundary_faces if ibface_group == igrp] if not isinstance(mgrp, SimplexElementGroup): raise NotImplementedError("can only take boundary of " "SimplexElementGroup-based meshes") # {{{ Preallocate arrays for mesh group ngroup_bdry_elements = len(group_boundary_faces) vertex_indices = np.empty( (ngroup_bdry_elements, mgrp.dim+1-1), mgrp.vertex_indices.dtype) bdry_unit_nodes = mp.warp_and_blend_nodes(mgrp.dim-1, mgrp.order) bdry_unit_nodes_01 = (bdry_unit_nodes + 1)*0.5 vol_basis = mp.simplex_onb(mgrp.dim, mgrp.order) nbdry_unit_nodes = bdry_unit_nodes_01.shape[-1] nodes = np.empty( (discr.ambient_dim, ngroup_bdry_elements, nbdry_unit_nodes), dtype=np.float64) # }}} grp_face_vertex_indices = mgrp.face_vertex_indices() grp_vertex_unit_coordinates = mgrp.vertex_unit_coordinates() # batch by face_id batch_base = 0 for face_id in range(len(grp_face_vertex_indices)): batch_boundary_el_numbers_in_grp = np.array( [ ibface_el for ibface_el, ibface_face in group_boundary_faces if ibface_face == face_id], dtype=np.intp) new_el_numbers = np.arange( batch_base, batch_base + len(batch_boundary_el_numbers_in_grp)) # {{{ no per-element axes in these computations # Find boundary vertex indices loc_face_vertices = list(grp_face_vertex_indices[face_id]) # Find unit nodes for boundary element face_vertex_unit_coordinates = \ grp_vertex_unit_coordinates[loc_face_vertices] # Find A, b such that A [e_1 e_2] + b = [r_1 r_2] # (Notation assumes that the volume is 3D and the face is 2D. # Code does not.) b = face_vertex_unit_coordinates[0] A = ( face_vertex_unit_coordinates[1:] - face_vertex_unit_coordinates[0]).T face_unit_nodes = (np.dot(A, bdry_unit_nodes_01).T + b).T resampling_mat = mp.resampling_matrix( vol_basis, face_unit_nodes, mgrp.unit_nodes) # }}} # {{{ build information for mesh element group # Find vertex_indices glob_face_vertices = mgrp.vertex_indices[ batch_boundary_el_numbers_in_grp][:, loc_face_vertices] vertex_indices[new_el_numbers] = \ vol_to_bdry_vertices[glob_face_vertices] # Find nodes nodes[:, new_el_numbers, :] = np.einsum( "ij,dej->dei", resampling_mat, mgrp.nodes[:, batch_boundary_el_numbers_in_grp, :]) # }}} connection_data[igrp, face_id] = _ConnectionBatchData( group_source_element_indices=batch_boundary_el_numbers_in_grp, group_target_element_indices=new_el_numbers, A=A, b=b, ) batch_base += len(batch_boundary_el_numbers_in_grp) bdry_mesh_group = SimplexElementGroup( mgrp.order, vertex_indices, nodes, unit_nodes=bdry_unit_nodes) bdry_mesh_groups.append(bdry_mesh_group) bdry_mesh = Mesh(bdry_vertices, bdry_mesh_groups) from meshmode.discretization import Discretization bdry_discr = Discretization( discr.cl_context, bdry_mesh, group_factory) connection = _build_boundary_connection( queue, discr, bdry_discr, connection_data) logger.info("building boundary connection: done") return bdry_mesh, bdry_discr, connection
def test_mesh_multiple_groups(actx_factory, ambient_dim, visualize=False): actx = actx_factory() order = 4 mesh = mgen.generate_regular_rect_mesh(a=(-0.5, ) * ambient_dim, b=(0.5, ) * ambient_dim, nelements_per_axis=(8, ) * ambient_dim, order=order) assert len(mesh.groups) == 1 from meshmode.mesh.processing import split_mesh_groups element_flags = np.any( mesh.vertices[0, mesh.groups[0].vertex_indices] < 0.0, axis=1).astype(np.int64) mesh = split_mesh_groups(mesh, element_flags) assert len(mesh.groups) == 2 # pylint: disable=no-member assert mesh.facial_adjacency_groups assert mesh.nodal_adjacency if visualize and ambient_dim == 2: from meshmode.mesh.visualization import draw_2d_mesh draw_2d_mesh(mesh, draw_vertex_numbers=False, draw_element_numbers=True, draw_face_numbers=False, set_bounding_box=True) import matplotlib.pyplot as plt plt.savefig("test_mesh_multiple_groups_2d_elements.png", dpi=300) from meshmode.discretization import Discretization discr = Discretization(actx, mesh, PolynomialWarpAndBlendGroupFactory(order)) if visualize: group_id = discr.empty(actx, dtype=np.int32) for igrp, vec in enumerate(group_id): vec.fill(igrp) from meshmode.discretization.visualization import make_visualizer vis = make_visualizer(actx, discr, vis_order=order) vis.write_vtk_file("mesh_multiple_groups.vtu", [("group_id", group_id)], overwrite=True) # check face restrictions from meshmode.discretization.connection import ( make_face_restriction, make_face_to_all_faces_embedding, make_opposite_face_connection, check_connection) for boundary_tag in [BTAG_ALL, FACE_RESTR_INTERIOR, FACE_RESTR_ALL]: conn = make_face_restriction( actx, discr, group_factory=PolynomialWarpAndBlendGroupFactory(order), boundary_tag=boundary_tag, per_face_groups=False) check_connection(actx, conn) bdry_f = conn.to_discr.zeros(actx) + 1 if boundary_tag == FACE_RESTR_INTERIOR: opposite = make_opposite_face_connection(actx, conn) check_connection(actx, opposite) op_bdry_f = opposite(bdry_f) error = flat_norm(bdry_f - op_bdry_f, np.inf) assert error < 1.0e-11, error if boundary_tag == FACE_RESTR_ALL: embedding = make_face_to_all_faces_embedding( actx, conn, conn.to_discr) check_connection(actx, embedding) em_bdry_f = embedding(bdry_f) error = flat_norm(bdry_f - em_bdry_f) assert error < 1.0e-11, error # check some derivatives (nb: flatten is a generator) import pytools ref_axes = pytools.flatten([[i] for i in range(ambient_dim)]) from meshmode.discretization import num_reference_derivative x = thaw(discr.nodes(), actx) num_reference_derivative(discr, ref_axes, x[0])
def _setup_neighbor_connections(self): comm = self.context.communicator # Why is this barrier needed? Some of our ranks may arrive at this # point early and start sending packets to ranks that are still stuck # in previous wildcard-recv loops. These receivers will then be very # confused by packets they didn't expect, and, once they reach their # recv bit in *this* subroutine, will wait for packets that will never # arrive. This same argument does not apply to other recv()s in this # file because they are targeted and thus benefit from MPI's # non-overtaking rule. # # Parallel programming is fun. comm.Barrier() if self.neighbor_ranks: # send interface information to neighboring ranks ----------------- from pytools import reverse_dictionary local2global_vertex_indices = \ reverse_dictionary(self.global2local_vertex_indices) send_requests = [] for rank in self.neighbor_ranks: bdry_tag = hedge.mesh.TAG_RANK_BOUNDARY(rank) rank_bdry = self.subdiscr.mesh.tag_to_boundary[bdry_tag] rank_discr_boundary = self.subdiscr.get_boundary(bdry_tag) # a list of global vertex numbers for each face my_vertices_global = [ tuple(local2global_vertex_indices[vi] for vi in el.faces[face_nr]) for el, face_nr in rank_bdry ] # a list of node coordinates, indicating the order # in which nodal values will be sent, this is for # testing only and could (potentially) be omitted my_node_coords = [] for el, face_nr in rank_bdry: eslice, ldis = self.subdiscr.find_el_data(el.id) findices = ldis.face_indices()[face_nr] my_node_coords.append( [self.nodes[eslice.start + i] for i in findices]) # compile a list of FluxFace.h values for unification # across the rank boundary my_h_values = [ rank_discr_boundary.find_facepair_side(el_face).h for el_face in rank_bdry ] packet = (my_vertices_global, my_node_coords, my_h_values) send_requests.append(comm.isend(packet, dest=rank, tag=0)) received_packets = {} while len(received_packets) < len(self.neighbor_ranks): status = mpi.Status() received_packet = comm.recv(tag=0, source=mpi.ANY_SOURCE, status=status) received_packets[status.source] = received_packet mpi.Request.Waitall(send_requests) # process received packets ---------------------------------------- from pytools import flatten # nb_ stands for neighbor_ self.from_neighbor_maps = {} for rank, (nb_all_facevertices_global, nb_node_coords, nb_h_values) in \ received_packets.iteritems(): bdry_tag = hedge.mesh.TAG_RANK_BOUNDARY(rank) rank_bdry = self.subdiscr.mesh.tag_to_boundary[bdry_tag] rank_discr_boundary = self.subdiscr.get_boundary(bdry_tag) flat_nb_node_coords = list(flatten(nb_node_coords)) # step 1: find start node indices for each # of the neighbor's elements nb_face_starts = [0] for node_coords in nb_node_coords[:-1]: nb_face_starts.append(nb_face_starts[-1] + len(node_coords)) # step 2: match faces by matching vertices nb_face_order = dict( (frozenset(vertices), i) for i, vertices in enumerate(nb_all_facevertices_global)) # step 3: make a list of indices into the data we # receive from our neighbor that'll tell us how # to reshuffle them to match our node order from_indices = [] shuffled_indices_cache = {} def get_shuffled_indices(face_node_count, shuffle_op): try: return shuffled_indices_cache[shuffle_op] except KeyError: unshuffled_indices = range(face_node_count) result = shuffled_indices_cache[shuffle_op] = \ shuffle_op(unshuffled_indices) return result for el, face_nr in rank_bdry: eslice, ldis = self.subdiscr.find_el_data(el.id) my_vertices = el.faces[face_nr] my_global_vertices = tuple(local2global_vertex_indices[vi] for vi in my_vertices) face_node_count = ldis.face_node_count() try: nb_face_idx = nb_face_order[frozenset( my_global_vertices)] # continue below in else part except KeyError: # this happens if my_global_vertices is not a permutation # of the neighbor's face vertices. Periodicity is the only # reason why that would be so. my_vertices_there, axis = \ self.global_periodic_opposite_faces[ my_global_vertices] nb_face_idx = nb_face_order[frozenset( my_vertices_there)] his_vertices_here, axis2 = \ self.global_periodic_opposite_faces[ nb_all_facevertices_global[nb_face_idx]] assert axis == axis2 nb_face_start = nb_face_starts[nb_face_idx] shuffle_op = \ ldis.get_face_index_shuffle_to_match( my_global_vertices, his_vertices_here) shuffled_other_node_indices = [ nb_face_start + i for i in get_shuffled_indices( face_node_count, shuffle_op) ] from_indices.extend(shuffled_other_node_indices) # check if the nodes really match up if "parallel_setup" in self.debug: my_node_indices = [ eslice.start + i for i in ldis.face_indices()[face_nr] ] for my_i, other_i in zip( my_node_indices, shuffled_other_node_indices): dist = self.nodes[my_i] - flat_nb_node_coords[ other_i] dist[axis] = 0 assert la.norm(dist) < 1e-14 else: # continue handling of nonperiodic case nb_global_vertices = nb_all_facevertices_global[ nb_face_idx] nb_face_start = nb_face_starts[nb_face_idx] shuffle_op = \ ldis.get_face_index_shuffle_to_match( my_global_vertices, nb_global_vertices) shuffled_other_node_indices = [ nb_face_start + i for i in get_shuffled_indices( face_node_count, shuffle_op) ] from_indices.extend(shuffled_other_node_indices) # check if the nodes really match up if "parallel_setup" in self.debug: my_node_indices = [ eslice.start + i for i in ldis.face_indices()[face_nr] ] for my_i, other_i in zip( my_node_indices, shuffled_other_node_indices): dist = self.nodes[my_i] - flat_nb_node_coords[ other_i] assert la.norm(dist) < 1e-14 # finally, unify FluxFace.h values across boundary nb_h = nb_h_values[nb_face_idx] flux_face = rank_discr_boundary.find_facepair_side( (el, face_nr)) flux_face.h = max(nb_h, flux_face.h) if "parallel_setup" in self.debug: assert len(from_indices) == len(flat_nb_node_coords) # construct from_neighbor_map self.from_neighbor_maps[rank] = \ self.subdiscr.prepare_from_neighbor_map(from_indices)
def initialize(self, method): Depositor.initialize(self, method) backend_class = getattr(_internal, "GridFindDepositor" + method.get_dimensionality_suffix()) backend = self.backend = backend_class(method.mesh_data) discr = method.discretization grid_node_num_to_nodes = {} if self.brick_generator is None: bbox_min, bbox_max = discr.mesh.bounding_box() max_bbox_size = max(bbox_max-bbox_min) self.brick_generator = SingleBrickGenerator( mesh_margin=1e-3*max_bbox_size, overresolve=0.2) from pyrticle._internal import Brick for i, (stepwidths, origin, dims) in enumerate( self.brick_generator(discr)): backend.bricks.append( Brick(i, backend.grid_node_count(), stepwidths, origin, dims)) from pyrticle._internal import BoxFloat for eg in discr.element_groups: ldis = eg.local_discretization for el in eg.members: el_bbox = BoxFloat(*el.bounding_box(discr.mesh.points)) el_slice = discr.find_el_range(el.id) for brk in backend.bricks: if brk.bounding_box().intersect(el_bbox).is_empty(): continue for node_num in range(el_slice.start, el_slice.stop): try: cell_number = brk.which_cell( discr.nodes[node_num]) except ValueError: pass else: grid_node_num_to_nodes.setdefault( brk.index(cell_number), []).append(node_num) from pytools import flatten unassigned_nodes = (set(xrange(len(discr))) - set(flatten(grid_node_num_to_nodes.itervalues()))) if unassigned_nodes: raise RuntimeError("dep_grid_find: unassigned mesh nodes found. " "you should specify a mesh_margin when generating " "bricks") usecounts = numpy.zeros( (backend.grid_node_count(),)) for gnn in xrange(backend.grid_node_count()): grid_nodes = grid_node_num_to_nodes.get(gnn, []) usecounts[gnn] = len(grid_nodes) backend.node_number_list_starts.append(len(backend.node_number_lists)) backend.node_number_lists.extend(grid_nodes) backend.node_number_list_starts.append(len(backend.node_number_lists)) if "depositor" in method.debug: from hedge.visualization import SiloVisualizer vis = SiloVisualizer(discr) visf = vis.make_file("grid-find-debug") vis.add_data(visf, []) self.visualize_grid_quantities(visf, [ ("usecounts", usecounts) ]) visf.close() if "interactive" in cloud.debug: from matplotlib.pylab import hist, show hist(usecounts, bins=20) show()
def make_conformal_mesh_ext(points, elements, boundary_tagger=None, volume_tagger=None, periodicity=None, allow_internal_boundaries=False, _is_rankbdry_face=None, ): """Construct a simplicial mesh. Face indices follow the convention for the respective element, such as Triangle or Tetrahedron, in this module. :param points: an array of vertex coordinates, given as vectors. :param elements: an iterable of :class:`hedge.mesh.element.Element` instances. :param boundary_tagger: A function of *(fvi, el, fn, all_v)* that returns a list of boundary tags for a face identified by the parameters. *fvi* is the set of vertex indices of the face in question, *el* is an :class:`Element` instance, *fn* is the face number within *el*, and *all_v* is a list of all vertices. :param volume_tagger: A function of *(el, all_v)* returning a list of volume tags for the element identified by the parameters. *el* is an :class:`Element` instance and *all_v* is a list of all vertex coordinates. :param periodicity: either None or is a list of tuples just like the one documented for the `periodicity` member of class :class:`Mesh`. :param allow_internal_boundaries: Calls the boundary tagger for element-element interfaces as well. If the tagger returns an empty list of tags for an internal interface, it remains internal. :param _is_rankbdry_face: an implementation detail, should not be used from user code. It is a function returning whether a given face identified by *(element instance, face_nr)* is cut by a parallel mesh partition. """ # input validation if (not isinstance(points, numpy.ndarray) or not points.dtype == numpy.float64): raise TypeError("points must be a float64 array") if boundary_tagger is None: def boundary_tagger(fvi, el, fn, all_v): return [] if volume_tagger is None: def volume_tagger(el, all_v): return [] if _is_rankbdry_face is None: def _is_rankbdry_face(el_face): return False dim = max(el.dimensions for el in elements) if periodicity is None: periodicity = dim*[None] assert len(periodicity) == dim # tag elements tag_to_elements = {TAG_NONE: [], TAG_ALL: []} for el in elements: for el_tag in volume_tagger(el, points): tag_to_elements.setdefault(el_tag, []).append(el) tag_to_elements[TAG_ALL].append(el) # create face_map, which is a mapping of # (vertices on a face) -> # [(element, face_idx) for elements bordering that face] face_map = {} for el in elements: for fid, face_vertices in enumerate(el.faces): face_map.setdefault(frozenset(face_vertices), []).append((el, fid)) # build non-periodic connectivity structures interfaces = [] tag_to_boundary = { TAG_NONE: [], TAG_ALL: [], TAG_REALLY_ALL: [], } for face_vertices, els_faces in face_map.iteritems(): boundary_el_faces_tags = [] if len(els_faces) == 2: if allow_internal_boundaries: el_face_a, el_face_b = els_faces el_a, face_a = el_face_a el_b, face_b = el_face_b tags_a = boundary_tagger(face_vertices, el_a, face_a, points) tags_b = boundary_tagger(face_vertices, el_b, face_b, points) if not tags_a and not tags_b: interfaces.append(els_faces) elif tags_a and tags_b: boundary_el_faces_tags.append((el_face_a, tags_a)) boundary_el_faces_tags.append((el_face_b, tags_b)) else: raise RuntimeError("boundary tagger is inconsistent " "about boundary-ness of interior interface") else: interfaces.append(els_faces) elif len(els_faces) == 1: el_face = el, face = els_faces[0] tags = boundary_tagger(face_vertices, el, face, points) boundary_el_faces_tags.append((el_face, tags)) else: raise RuntimeError("face can at most border two elements") for el_face, tags in boundary_el_faces_tags: el, face = el_face tags = set(tags) - MESH_CREATION_TAGS assert not isinstance(tags, str), \ RuntimeError("Received string as tag list") assert TAG_ALL not in tags assert TAG_REALLY_ALL not in tags for btag in tags: tag_to_boundary.setdefault(btag, []) \ .append(el_face) if TAG_NO_BOUNDARY not in tags: # TAG_NO_BOUNDARY is used to mark rank interfaces # as not being part of the boundary tag_to_boundary[TAG_ALL].append(el_face) tag_to_boundary[TAG_REALLY_ALL].append(el_face) # add periodicity-induced connectivity from pytools import flatten, reverse_dictionary periodic_opposite_faces = {} periodic_opposite_vertices = {} for tag_bdries in tag_to_boundary.itervalues(): assert len(set(tag_bdries)) == len(tag_bdries) for axis, axis_periodicity in enumerate(periodicity): if axis_periodicity is not None: # find faces on +-axis boundaries minus_tag, plus_tag = axis_periodicity minus_faces = tag_to_boundary.get(minus_tag, []) plus_faces = tag_to_boundary.get(plus_tag, []) # find vertex indices and points on these faces minus_vertex_indices = list(set(flatten(el.faces[face] for el, face in minus_faces))) plus_vertex_indices = list(set(flatten(el.faces[face] for el, face in plus_faces))) minus_z_points = [points[pi] for pi in minus_vertex_indices] plus_z_points = [points[pi] for pi in plus_vertex_indices] # find a mapping from -axis to +axis vertices minus_to_plus, not_found = find_matching_vertices_along_axis( axis, minus_z_points, plus_z_points, minus_vertex_indices, plus_vertex_indices) plus_to_minus = reverse_dictionary(minus_to_plus) for a, b in minus_to_plus.iteritems(): periodic_opposite_vertices.setdefault(a, []).append((b, axis)) periodic_opposite_vertices.setdefault(b, []).append((a, axis)) # establish face connectivity for minus_face in minus_faces: minus_el, minus_fi = minus_face minus_fvi = minus_el.faces[minus_fi] try: mapped_plus_fvi = tuple(minus_to_plus[i] for i in minus_fvi) plus_faces = face_map[frozenset(mapped_plus_fvi)] assert len(plus_faces) == 1 except KeyError: # is our periodic counterpart is in a different mesh clump? if _is_rankbdry_face(minus_face): # if so, cool. parallel handler will take care of it. continue else: # if not, bad. raise plus_face = plus_faces[0] interfaces.append([minus_face, plus_face]) plus_el, plus_fi = plus_face plus_fvi = plus_el.faces[plus_fi] mapped_minus_fvi = tuple(plus_to_minus[i] for i in plus_fvi) # periodic_opposite_faces maps face vertex tuples from # one end of the periodic domain to the other, while # correspondence between each entry periodic_opposite_faces[minus_fvi] = mapped_plus_fvi, axis periodic_opposite_faces[plus_fvi] = mapped_minus_fvi, axis tag_to_boundary[TAG_ALL].remove(plus_face) tag_to_boundary[TAG_ALL].remove(minus_face) tag_to_boundary[TAG_REALLY_ALL].remove(plus_face) tag_to_boundary[TAG_REALLY_ALL].remove(minus_face) return ConformalMesh( points=points, elements=elements, interfaces=interfaces, tag_to_boundary=tag_to_boundary, tag_to_elements=tag_to_elements, periodicity=periodicity, periodic_opposite_faces=periodic_opposite_faces, periodic_opposite_vertices=periodic_opposite_vertices, has_internal_boundaries=allow_internal_boundaries, )