def _assemble(f, tensor=None, bcs=None, form_compiler_parameters=None, inverse=False, mat_type=None, sub_mat_type=None, appctx={}, options_prefix=None, collect_loops=False, allocate_only=False): """Assemble the form or Slate expression f and return a Firedrake object representing the result. This will be a :class:`float` for 0-forms/rank-0 Slate tensors, a :class:`.Function` for 1-forms/rank-1 Slate tensors and a :class:`.Matrix` for 2-forms/rank-2 Slate tensors. :arg bcs: A tuple of :class`.DirichletBC`\s to be applied. :arg tensor: An existing tensor object into which the form should be assembled. If this is not supplied, a new tensor will be created for the purpose. :arg form_compiler_parameters: (optional) dict of parameters to pass to the form compiler. :arg inverse: (optional) if f is a 2-form, then assemble the inverse of the local matrices. :arg mat_type: (optional) type for assembled matrices, one of "nest", "aij", "baij", or "matfree". :arg sub_mat_type: (optional) type for assembled sub matrices inside a "nest" matrix. One of "aij" or "baij". :arg appctx: Additional information to hang on the assembled matrix if an implicit matrix is requested (mat_type "matfree"). :arg options_prefix: An options prefix for the PETSc matrix (ignored if not assembling a bilinear form). """ if mat_type is None: mat_type = parameters.parameters["default_matrix_type"] if mat_type not in ["matfree", "aij", "baij", "nest"]: raise ValueError("Unrecognised matrix type, '%s'" % mat_type) if sub_mat_type is None: sub_mat_type = parameters.parameters["default_sub_matrix_type"] if sub_mat_type not in ["aij", "baij"]: raise ValueError("Invalid submatrix type, '%s' (not 'aij' or 'baij')", sub_mat_type) if form_compiler_parameters: form_compiler_parameters = form_compiler_parameters.copy() else: form_compiler_parameters = {} form_compiler_parameters["assemble_inverse"] = inverse topology = f.ufl_domains()[0].topology for m in f.ufl_domains(): # Ensure mesh is "initialised" (could have got here without # building a functionspace (e.g. if integrating a constant)). m.init() if m.topology != topology: raise NotImplementedError( "All integration domains must share a mesh topology.") for o in chain(f.arguments(), f.coefficients()): domain = o.ufl_domain() if domain is not None and domain.topology != topology: raise NotImplementedError( "Assembly with multiple meshes not supported.") if isinstance(f, slate.TensorBase): kernels = slac.compile_expression( f, tsfc_parameters=form_compiler_parameters) integral_types = [kernel.kinfo.integral_type for kernel in kernels] else: kernels = tsfc_interface.compile_form( f, "form", parameters=form_compiler_parameters, inverse=inverse) integral_types = [ integral.integral_type() for integral in f.integrals() ] rank = len(f.arguments()) is_mat = rank == 2 is_vec = rank == 1 if any((coeff.function_space() and coeff.function_space().component is not None) for coeff in f.coefficients()): raise NotImplementedError( "Integration of subscripted VFS not yet implemented") if inverse and rank != 2: raise ValueError("Can only assemble the inverse of a 2-form") zero_tensor = lambda: None if is_mat: matfree = mat_type == "matfree" nest = mat_type == "nest" if nest: baij = sub_mat_type == "baij" else: baij = mat_type == "baij" if matfree: # intercept matrix-free matrices here if inverse: raise NotImplementedError( "Inverse not implemented with matfree") if collect_loops: raise NotImplementedError("Can't collect loops with matfree") if tensor is None: return matrix.ImplicitMatrix( f, bcs, fc_params=form_compiler_parameters, appctx=appctx, options_prefix=options_prefix) if not isinstance(tensor, matrix.ImplicitMatrix): raise ValueError("Expecting implicit matrix with matfree") tensor.assemble() return tensor test, trial = f.arguments() map_pairs = [] cell_domains = [] exterior_facet_domains = [] interior_facet_domains = [] if tensor is None: # For horizontal facets of extruded meshes, the corresponding domain # in the base mesh is the cell domain. Hence all the maps used for top # bottom and interior horizontal facets will use the cell to dofs map # coming from the base mesh as a starting point for the actual dynamic map # computation. for integral_type in integral_types: if integral_type == "cell": cell_domains.append(op2.ALL) elif integral_type == "exterior_facet": exterior_facet_domains.append(op2.ALL) elif integral_type == "interior_facet": interior_facet_domains.append(op2.ALL) elif integral_type == "exterior_facet_bottom": cell_domains.append(op2.ON_BOTTOM) elif integral_type == "exterior_facet_top": cell_domains.append(op2.ON_TOP) elif integral_type == "exterior_facet_vert": exterior_facet_domains.append(op2.ALL) elif integral_type == "interior_facet_horiz": cell_domains.append(op2.ON_INTERIOR_FACETS) elif integral_type == "interior_facet_vert": interior_facet_domains.append(op2.ALL) else: raise ValueError('Unknown integral type "%s"' % integral_type) # To avoid an extra check for extruded domains, the maps that are being passed in # are DecoratedMaps. For the non-extruded case the DecoratedMaps don't restrict the # space over which we iterate as the domains are dropped at Sparsity construction # time. In the extruded case the cell domains are used to identify the regions of the # mesh which require allocation in the sparsity. if cell_domains: map_pairs.append( (op2.DecoratedMap(test.cell_node_map(), cell_domains), op2.DecoratedMap(trial.cell_node_map(), cell_domains))) if exterior_facet_domains: map_pairs.append( (op2.DecoratedMap(test.exterior_facet_node_map(), exterior_facet_domains), op2.DecoratedMap(trial.exterior_facet_node_map(), exterior_facet_domains))) if interior_facet_domains: map_pairs.append( (op2.DecoratedMap(test.interior_facet_node_map(), interior_facet_domains), op2.DecoratedMap(trial.interior_facet_node_map(), interior_facet_domains))) map_pairs = tuple(map_pairs) # Construct OP2 Mat to assemble into fs_names = (test.function_space().name, trial.function_space().name) try: sparsity = op2.Sparsity((test.function_space().dof_dset, trial.function_space().dof_dset), map_pairs, "%s_%s_sparsity" % fs_names, nest=nest, block_sparse=baij) except SparsityFormatError: raise ValueError( "Monolithic matrix assembly is not supported for systems with R-space blocks." ) result_matrix = matrix.Matrix(f, bcs, mat_type, sparsity, numpy.float64, "%s_%s_matrix" % fs_names, options_prefix=options_prefix) tensor = result_matrix._M else: if isinstance(tensor, matrix.ImplicitMatrix): raise ValueError("Expecting matfree with implicit matrix") result_matrix = tensor # Replace any bcs on the tensor we passed in result_matrix.bcs = bcs tensor = tensor._M zero_tensor = tensor.zero if result_matrix.block_shape != (1, 1) and mat_type == "baij": raise ValueError( "BAIJ matrix type makes no sense for mixed spaces, use 'aij'") def mat(testmap, trialmap, i, j): m = testmap(test.function_space()[i]) n = trialmap(trial.function_space()[j]) maps = (m[op2.i[0]] if m else None, n[op2.i[1 if m else 0]] if n else None) return tensor[i, j](op2.INC, maps) result = lambda: result_matrix if allocate_only: result_matrix._assembly_callback = None return result_matrix elif is_vec: test = f.arguments()[0] if tensor is None: result_function = function.Function(test.function_space()) tensor = result_function.dat else: result_function = tensor tensor = result_function.dat zero_tensor = tensor.zero def vec(testmap, i): _testmap = testmap(test.function_space()[i]) return tensor[i](op2.INC, _testmap[op2.i[0]] if _testmap else None) result = lambda: result_function else: # 0-forms are always scalar if tensor is None: tensor = op2.Global(1, [0.0]) else: raise ValueError("Can't assemble 0-form into existing tensor") result = lambda: tensor.data[0] coefficients = f.coefficients() domains = f.ufl_domains() # These will be used to correctly interpret the "otherwise" # subdomain all_integer_subdomain_ids = defaultdict(list) for k in kernels: if k.kinfo.subdomain_id != "otherwise": all_integer_subdomain_ids[k.kinfo.integral_type].append( k.kinfo.subdomain_id) for k, v in all_integer_subdomain_ids.items(): all_integer_subdomain_ids[k] = tuple(sorted(v)) # Since applying boundary conditions to a matrix changes the # initial assembly, to support: # A = assemble(a) # bc.apply(A) # solve(A, ...) # we need to defer actually assembling the matrix until just # before we need it (when we know if there are any bcs to be # applied). To do so, we build a closure that carries out the # assembly and stash that on the Matrix object. When we hit a # solve, we funcall the closure with any bcs the Matrix now has to # assemble it. # In collecting loops mode, we collect the loops, and assume the # boundary conditions provided are the ones we want. It therefore # is only used inside residual and jacobian assembly. loops = [] def thunk(bcs): if collect_loops: loops.append(zero_tensor) else: zero_tensor() for indices, kinfo in kernels: kernel = kinfo.kernel integral_type = kinfo.integral_type domain_number = kinfo.domain_number subdomain_id = kinfo.subdomain_id coeff_map = kinfo.coefficient_map pass_layer_arg = kinfo.pass_layer_arg needs_orientations = kinfo.oriented needs_cell_facets = kinfo.needs_cell_facets needs_cell_sizes = kinfo.needs_cell_sizes m = domains[domain_number] subdomain_data = f.subdomain_data()[m] # Find argument space indices if is_mat: i, j = indices elif is_vec: i, = indices else: assert len(indices) == 0 sdata = subdomain_data.get(integral_type, None) if integral_type != 'cell' and sdata is not None: raise NotImplementedError( "subdomain_data only supported with cell integrals.") # Extract block from tensor and test/trial spaces # FIXME Ugly variable renaming required because functions are not # lexical closures in Python and we're writing to these variables if is_mat and result_matrix.block_shape > (1, 1): tsbc = [] trbc = [] # Unwind ComponentFunctionSpace to check for matching BCs for bc in bcs: fs = bc.function_space() if fs.component is not None: fs = fs.parent if fs.index == i: tsbc.append(bc) if fs.index == j: trbc.append(bc) elif is_mat: tsbc, trbc = bcs, bcs # Now build arguments for the par_loop kwargs = {} # Some integrals require non-coefficient arguments at the # end (facet number information). extra_args = [] # Decoration for applying to matrix maps in extruded case decoration = None itspace = m.measure_set(integral_type, subdomain_id, all_integer_subdomain_ids) if integral_type == "cell": itspace = sdata or itspace if subdomain_id not in ["otherwise", "everywhere"] and \ sdata is not None: raise ValueError( "Cannot use subdomain data and subdomain_id") def get_map(x, bcs=None, decoration=None): return x.cell_node_map(bcs) elif integral_type in ("exterior_facet", "exterior_facet_vert"): extra_args.append(m.exterior_facets.local_facet_dat(op2.READ)) def get_map(x, bcs=None, decoration=None): return x.exterior_facet_node_map(bcs) elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"): # In the case of extruded meshes with horizontal facet integrals, two # parallel loops will (potentially) get created and called based on the # domain id: interior horizontal, bottom or top. decoration = { "exterior_facet_top": op2.ON_TOP, "exterior_facet_bottom": op2.ON_BOTTOM }[integral_type] kwargs["iterate"] = decoration def get_map(x, bcs=None, decoration=None): map_ = x.cell_node_map(bcs) if decoration is not None: return op2.DecoratedMap(map_, decoration) return map_ elif integral_type in ("interior_facet", "interior_facet_vert"): extra_args.append(m.interior_facets.local_facet_dat(op2.READ)) def get_map(x, bcs=None, decoration=None): return x.interior_facet_node_map(bcs) elif integral_type == "interior_facet_horiz": decoration = op2.ON_INTERIOR_FACETS kwargs["iterate"] = decoration def get_map(x, bcs=None, decoration=None): map_ = x.cell_node_map(bcs) if decoration is not None: return op2.DecoratedMap(map_, decoration) return map_ else: raise ValueError("Unknown integral type '%s'" % integral_type) # Output argument if is_mat: tensor_arg = mat(lambda s: get_map(s, tsbc, decoration), lambda s: get_map(s, trbc, decoration), i, j) elif is_vec: tensor_arg = vec(lambda s: get_map(s), i) else: tensor_arg = tensor(op2.INC) coords = m.coordinates args = [ kernel, itspace, tensor_arg, coords.dat(op2.READ, get_map(coords)[op2.i[0]]) ] if needs_orientations: o = m.cell_orientations() args.append(o.dat(op2.READ, get_map(o)[op2.i[0]])) if needs_cell_sizes: o = m.cell_sizes args.append(o.dat(op2.READ, get_map(o)[op2.i[0]])) for n in coeff_map: c = coefficients[n] for c_ in c.split(): m_ = get_map(c_) args.append(c_.dat(op2.READ, m_ and m_[op2.i[0]])) if needs_cell_facets: assert integral_type == "cell" extra_args.append(m.cell_to_facets(op2.READ)) args.extend(extra_args) kwargs["pass_layer_arg"] = pass_layer_arg try: with collecting_loops(collect_loops): loops.append(op2.par_loop(*args, **kwargs)) except MapValueError: raise RuntimeError( "Integral measure does not match measure of all coefficients/arguments" ) # Must apply bcs outside loop over kernels because we may wish # to apply bcs to a block which is otherwise zero, and # therefore does not have an associated kernel. if bcs is not None and is_mat: for bc in bcs: fs = bc.function_space() # Evaluate this outwith a "collecting_loops" block, # since creation of the bc nodes actually can create a # par_loop. nodes = bc.nodes if len(fs) > 1: raise RuntimeError( """Cannot apply boundary conditions to full mixed space. Did you forget to index it?""" ) shape = result_matrix.block_shape with collecting_loops(collect_loops): for i in range(shape[0]): for j in range(shape[1]): # Set diagonal entries on bc nodes to 1 if the current # block is on the matrix diagonal and its index matches the # index of the function space the bc is defined on. if i != j: continue if fs.component is None and fs.index is not None: # Mixed, index (no ComponentFunctionSpace) if fs.index == i: loops.append(tensor[ i, j].set_local_diagonal_entries(nodes)) elif fs.component is not None: # ComponentFunctionSpace, check parent index if fs.parent.index is not None: # Mixed, index doesn't match if fs.parent.index != i: continue # Index matches loops.append( tensor[i, j].set_local_diagonal_entries( nodes, idx=fs.component)) elif fs.index is None: loops.append(tensor[ i, j].set_local_diagonal_entries(nodes)) else: raise RuntimeError("Unhandled BC case") if bcs is not None and is_vec: if len(bcs) > 0 and collect_loops: raise NotImplementedError( "Loop collection not handled in this case") for bc in bcs: bc.apply(result_function) if is_mat: # Queue up matrix assembly (after we've done all the other operations) loops.append(tensor.assemble()) return result() if collect_loops: thunk(bcs) return loops if is_mat: result_matrix._assembly_callback = thunk return result() else: return thunk(bcs)
def dg_injection_kernel(Vf, Vc, ncell): from firedrake import Tensor, AssembledVector, TestFunction, TrialFunction from firedrake.slate.slac import compile_expression macro_builder = MacroKernelBuilder(ScalarType_c, ncell) f = ufl.Coefficient(Vf) macro_builder.set_coefficients([f]) macro_builder.set_coordinates(Vf.mesh()) Vfe = create_element(Vf.ufl_element()) macro_quadrature_rule = make_quadrature( Vfe.cell, estimate_total_polynomial_degree(ufl.inner(f, f))) index_cache = {} parameters = default_parameters() integration_dim, entity_ids = lower_integral_type(Vfe.cell, "cell") macro_cfg = dict(interface=macro_builder, ufl_cell=Vf.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=macro_quadrature_rule) fexpr, = fem.compile_ufl(f, **macro_cfg) X = ufl.SpatialCoordinate(Vf.mesh()) C_a, = fem.compile_ufl(X, **macro_cfg) detJ = ufl_utils.preprocess_expression( abs(ufl.JacobianDeterminant(f.ufl_domain()))) macro_detJ, = fem.compile_ufl(detJ, **macro_cfg) Vce = create_element(Vc.ufl_element()) coarse_builder = firedrake_interface.KernelBuilder("cell", "otherwise", 0, ScalarType_c) coarse_builder.set_coordinates(Vc.mesh()) argument_multiindices = (Vce.get_indices(), ) argument_multiindex, = argument_multiindices return_variable, = coarse_builder.set_arguments((ufl.TestFunction(Vc), ), argument_multiindices) integration_dim, entity_ids = lower_integral_type(Vce.cell, "cell") # Midpoint quadrature for jacobian on coarse cell. quadrature_rule = make_quadrature(Vce.cell, 0) coarse_cfg = dict(interface=coarse_builder, ufl_cell=Vc.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=quadrature_rule) X = ufl.SpatialCoordinate(Vc.mesh()) K = ufl_utils.preprocess_expression(ufl.JacobianInverse(Vc.mesh())) C_0, = fem.compile_ufl(X, **coarse_cfg) K, = fem.compile_ufl(K, **coarse_cfg) i = gem.Index() j = gem.Index() C_0 = gem.Indexed(C_0, (j, )) C_0 = gem.index_sum(C_0, quadrature_rule.point_set.indices) C_a = gem.Indexed(C_a, (j, )) X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), C_a)) K_ij = gem.Indexed(K, (i, j)) K_ij = gem.index_sum(K_ij, quadrature_rule.point_set.indices) X_a = gem.index_sum(gem.Product(K_ij, X_a), (j, )) C_0, = quadrature_rule.point_set.points C_0 = gem.Indexed(gem.Literal(C_0), (i, )) # fine quad points in coarse reference space. X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), X_a)) X_a = gem.ComponentTensor(X_a, (i, )) # Coarse basis function evaluated at fine quadrature points phi_c = fem.fiat_to_ufl( Vce.point_evaluation(0, X_a, (Vce.cell.get_dimension(), 0)), 0) tensor_indices = tuple(gem.Index(extent=d) for d in f.ufl_shape) phi_c = gem.Indexed(phi_c, argument_multiindex + tensor_indices) fexpr = gem.Indexed(fexpr, tensor_indices) quadrature_weight = macro_quadrature_rule.weight_expression expr = gem.Product(gem.IndexSum(gem.Product(phi_c, fexpr), tensor_indices), gem.Product(macro_detJ, quadrature_weight)) quadrature_indices = macro_builder.indices + macro_quadrature_rule.point_set.indices reps = spectral.Integrals([expr], quadrature_indices, argument_multiindices, parameters) assignments = spectral.flatten([(return_variable, reps)], index_cache) return_variables, expressions = zip(*assignments) expressions = impero_utils.preprocess_gem(expressions, **spectral.finalise_options) assignments = list(zip(return_variables, expressions)) impero_c = impero_utils.compile_gem(assignments, quadrature_indices + argument_multiindex, remove_zeros=True) index_names = [] def name_index(index, name): index_names.append((index, name)) if index in index_cache: for multiindex, suffix in zip(index_cache[index], string.ascii_lowercase): name_multiindex(multiindex, name + suffix) def name_multiindex(multiindex, name): if len(multiindex) == 1: name_index(multiindex[0], name) else: for i, index in enumerate(multiindex): name_index(index, name + str(i)) name_multiindex(quadrature_indices, 'ip') for multiindex, name in zip(argument_multiindices, ['j', 'k']): name_multiindex(multiindex, name) index_names.extend(zip(macro_builder.indices, ["entity"])) body = generate_coffee(impero_c, index_names, parameters["precision"], ScalarType_c) retarg = ast.Decl(ScalarType_c, ast.Symbol("R", rank=(Vce.space_dimension(), ))) local_tensor = coarse_builder.local_tensor local_tensor.init = ast.ArrayInit( numpy.zeros(Vce.space_dimension(), dtype=ScalarType_c)) body.children.insert(0, local_tensor) args = [retarg] + macro_builder.kernel_args + [ macro_builder.coordinates_arg, coarse_builder.coordinates_arg ] # Now we have the kernel that computes <f, phi_c>dx_c # So now we need to hit it with the inverse mass matrix on dx_c u = TrialFunction(Vc) v = TestFunction(Vc) expr = Tensor(ufl.inner(u, v) * ufl.dx).inv * AssembledVector( ufl.Coefficient(Vc)) Ainv, = compile_expression(expr) Ainv = Ainv.kinfo.kernel A = ast.Symbol(local_tensor.sym.symbol) R = ast.Symbol("R") body.children.append( ast.FunCall(Ainv.name, R, coarse_builder.coordinates_arg.sym, A)) from coffee.base import Node assert isinstance(Ainv._code, Node) return op2.Kernel(ast.Node([ Ainv._code, ast.FunDecl("void", "pyop2_kernel_injection_dg", args, body, pred=["static", "inline"]) ]), name="pyop2_kernel_injection_dg", cpp=True, include_dirs=Ainv._include_dirs, headers=Ainv._headers)
def _assemble(f, tensor=None, bcs=None, form_compiler_parameters=None, inverse=False, mat_type=None, sub_mat_type=None, appctx={}, options_prefix=None, assemble_now=False, allocate_only=False, zero_tensor=True): r"""Assemble the form or Slate expression f and return a Firedrake object representing the result. This will be a :class:`float` for 0-forms/rank-0 Slate tensors, a :class:`.Function` for 1-forms/rank-1 Slate tensors and a :class:`.Matrix` for 2-forms/rank-2 Slate tensors. :arg bcs: A tuple of :class`.DirichletBC`\s and/or :class`.EquationBCSplit`\s to be applied. :arg tensor: An existing tensor object into which the form should be assembled. If this is not supplied, a new tensor will be created for the purpose. :arg form_compiler_parameters: (optional) dict of parameters to pass to the form compiler. :arg inverse: (optional) if f is a 2-form, then assemble the inverse of the local matrices. :arg mat_type: (optional) type for assembled matrices, one of "nest", "aij", "baij", or "matfree". :arg sub_mat_type: (optional) type for assembled sub matrices inside a "nest" matrix. One of "aij" or "baij". :arg appctx: Additional information to hang on the assembled matrix if an implicit matrix is requested (mat_type "matfree"). :arg options_prefix: An options prefix for the PETSc matrix (ignored if not assembling a bilinear form). """ if mat_type is None: mat_type = parameters.parameters["default_matrix_type"] if mat_type not in ["matfree", "aij", "baij", "nest"]: raise ValueError("Unrecognised matrix type, '%s'" % mat_type) if sub_mat_type is None: sub_mat_type = parameters.parameters["default_sub_matrix_type"] if sub_mat_type not in ["aij", "baij"]: raise ValueError("Invalid submatrix type, '%s' (not 'aij' or 'baij')", sub_mat_type) if form_compiler_parameters: form_compiler_parameters = form_compiler_parameters.copy() else: form_compiler_parameters = {} form_compiler_parameters["assemble_inverse"] = inverse topology = f.ufl_domains()[0].topology for m in f.ufl_domains(): # Ensure mesh is "initialised" (could have got here without # building a functionspace (e.g. if integrating a constant)). m.init() if m.topology != topology: raise NotImplementedError("All integration domains must share a mesh topology.") for o in chain(f.arguments(), f.coefficients()): domain = o.ufl_domain() if domain is not None and domain.topology != topology: raise NotImplementedError("Assembly with multiple meshes not supported.") if isinstance(f, slate.TensorBase): kernels = slac.compile_expression(f, tsfc_parameters=form_compiler_parameters) integral_types = [kernel.kinfo.integral_type for kernel in kernels] else: kernels = tsfc_interface.compile_form(f, "form", parameters=form_compiler_parameters, inverse=inverse) integral_types = [integral.integral_type() for integral in f.integrals()] if bcs is not None: for bc in bcs: integral_types += [integral.integral_type() for integral in bc.integrals()] rank = len(f.arguments()) is_mat = rank == 2 is_vec = rank == 1 if any((coeff.function_space() and coeff.function_space().component is not None) for coeff in f.coefficients()): raise NotImplementedError("Integration of subscripted VFS not yet implemented") if inverse and rank != 2: raise ValueError("Can only assemble the inverse of a 2-form") zero_tensor_parloop = lambda: None if is_mat: matfree = mat_type == "matfree" nest = mat_type == "nest" if nest: baij = sub_mat_type == "baij" else: baij = mat_type == "baij" # intercept matrix-free matrices here if matfree: if inverse: raise NotImplementedError("Inverse not implemented with matfree") if tensor is None: result_matrix = matrix.ImplicitMatrix(f, bcs, fc_params=form_compiler_parameters, appctx=appctx, options_prefix=options_prefix) yield lambda: result_matrix raise StopIteration() if not isinstance(tensor, matrix.ImplicitMatrix): raise ValueError("Expecting implicit matrix with matfree") tensor.assemble() yield lambda: tensor raise StopIteration() test, trial = f.arguments() map_pairs = [] cell_domains = [] exterior_facet_domains = [] interior_facet_domains = [] if tensor is None: # For horizontal facets of extruded meshes, the corresponding domain # in the base mesh is the cell domain. Hence all the maps used for top # bottom and interior horizontal facets will use the cell to dofs map # coming from the base mesh as a starting point for the actual dynamic map # computation. for integral_type in integral_types: if integral_type == "cell": cell_domains.append(op2.ALL) elif integral_type == "exterior_facet": exterior_facet_domains.append(op2.ALL) elif integral_type == "interior_facet": interior_facet_domains.append(op2.ALL) elif integral_type == "exterior_facet_bottom": cell_domains.append(op2.ON_BOTTOM) elif integral_type == "exterior_facet_top": cell_domains.append(op2.ON_TOP) elif integral_type == "exterior_facet_vert": exterior_facet_domains.append(op2.ALL) elif integral_type == "interior_facet_horiz": cell_domains.append(op2.ON_INTERIOR_FACETS) elif integral_type == "interior_facet_vert": interior_facet_domains.append(op2.ALL) else: raise ValueError('Unknown integral type "%s"' % integral_type) # Used for the sparsity construction iteration_regions = [] if cell_domains: map_pairs.append((test.cell_node_map(), trial.cell_node_map())) iteration_regions.append(tuple(cell_domains)) if exterior_facet_domains: map_pairs.append((test.exterior_facet_node_map(), trial.exterior_facet_node_map())) iteration_regions.append(tuple(exterior_facet_domains)) if interior_facet_domains: map_pairs.append((test.interior_facet_node_map(), trial.interior_facet_node_map())) iteration_regions.append(tuple(interior_facet_domains)) map_pairs = tuple(map_pairs) # Construct OP2 Mat to assemble into fs_names = (test.function_space().name, trial.function_space().name) try: sparsity = op2.Sparsity((test.function_space().dof_dset, trial.function_space().dof_dset), map_pairs, iteration_regions=iteration_regions, name="%s_%s_sparsity" % fs_names, nest=nest, block_sparse=baij) except SparsityFormatError: raise ValueError("Monolithic matrix assembly is not supported for systems with R-space blocks.") result_matrix = matrix.Matrix(f, bcs, mat_type, sparsity, numpy.float64, "%s_%s_matrix" % fs_names, options_prefix=options_prefix) tensor = result_matrix._M else: if isinstance(tensor, matrix.ImplicitMatrix): raise ValueError("Expecting matfree with implicit matrix") result_matrix = tensor tensor = tensor._M zero_tensor_parloop = tensor.zero if result_matrix.block_shape != (1, 1) and mat_type == "baij": raise ValueError("BAIJ matrix type makes no sense for mixed spaces, use 'aij'") def mat(testmap, trialmap, rowbc, colbc, i, j): m = testmap(test.function_space()[i]) n = trialmap(trial.function_space()[j]) maps = (m if m else None, n if n else None) rlgmap, clgmap = tensor[i, j].local_to_global_maps V = test.function_space()[i] rlgmap = V.local_to_global_map(rowbc, lgmap=rlgmap) V = trial.function_space()[j] clgmap = V.local_to_global_map(colbc, lgmap=clgmap) if rowbc is None: rowbc = [] if colbc is None: colbc = [] unroll = any(bc.function_space().component is not None for bc in chain(rowbc, colbc) if bc is not None) return tensor[i, j](op2.INC, maps, lgmaps=(rlgmap, clgmap), unroll_map=unroll) result = lambda: result_matrix if allocate_only: yield result raise StopIteration() elif is_vec: test = f.arguments()[0] if tensor is None: result_function = function.Function(test.function_space()) tensor = result_function.dat else: result_function = tensor tensor = result_function.dat zero_tensor_parloop = tensor.zero def vec(testmap, i): _testmap = testmap(test.function_space()[i]) return tensor[i](op2.INC, _testmap if _testmap else None) result = lambda: result_function else: # 0-forms are always scalar if tensor is None: tensor = op2.Global(1, [0.0]) else: raise ValueError("Can't assemble 0-form into existing tensor") result = lambda: tensor.data[0] coefficients = f.coefficients() domains = f.ufl_domains() # These will be used to correctly interpret the "otherwise" # subdomain all_integer_subdomain_ids = defaultdict(list) for k in kernels: if k.kinfo.subdomain_id != "otherwise": all_integer_subdomain_ids[k.kinfo.integral_type].append(k.kinfo.subdomain_id) for k, v in all_integer_subdomain_ids.items(): all_integer_subdomain_ids[k] = tuple(sorted(v)) # In collecting loops mode, we collect the loops, and assume the # boundary conditions provided are the ones we want. It therefore # is only used inside residual and jacobian assembly. if zero_tensor: yield zero_tensor_parloop for indices, kinfo in kernels: kernel = kinfo.kernel integral_type = kinfo.integral_type domain_number = kinfo.domain_number subdomain_id = kinfo.subdomain_id coeff_map = kinfo.coefficient_map pass_layer_arg = kinfo.pass_layer_arg needs_orientations = kinfo.oriented needs_cell_facets = kinfo.needs_cell_facets needs_cell_sizes = kinfo.needs_cell_sizes m = domains[domain_number] subdomain_data = f.subdomain_data()[m] # Find argument space indices if is_mat: i, j = indices elif is_vec: i, = indices else: assert len(indices) == 0 sdata = subdomain_data.get(integral_type, None) if integral_type != 'cell' and sdata is not None: raise NotImplementedError("subdomain_data only supported with cell integrals.") # Extract block from tensor and test/trial spaces # FIXME Ugly variable renaming required because functions are not # lexical closures in Python and we're writing to these variables if is_mat: if bcs is not None: tsbc = list(bc for bc in chain(*bcs)) if result_matrix.block_shape > (1, 1): trbc = [bc for bc in tsbc if bc.function_space_index() == j and isinstance(bc, DirichletBC)] tsbc = [bc for bc in tsbc if bc.function_space_index() == i] else: trbc = [bc for bc in tsbc if isinstance(bc, DirichletBC)] else: tsbc = [] trbc = [] # Now build arguments for the par_loop kwargs = {} # Some integrals require non-coefficient arguments at the # end (facet number information). extra_args = [] # Decoration for applying to matrix maps in extruded case decoration = None itspace = m.measure_set(integral_type, subdomain_id, all_integer_subdomain_ids) if integral_type == "cell": itspace = sdata or itspace if subdomain_id not in ["otherwise", "everywhere"] and sdata is not None: raise ValueError("Cannot use subdomain data and subdomain_id") def get_map(x): return x.cell_node_map() elif integral_type in ("exterior_facet", "exterior_facet_vert"): extra_args.append(m.exterior_facets.local_facet_dat(op2.READ)) def get_map(x): return x.exterior_facet_node_map() elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"): # In the case of extruded meshes with horizontal facet integrals, two # parallel loops will (potentially) get created and called based on the # domain id: interior horizontal, bottom or top. decoration = {"exterior_facet_top": op2.ON_TOP, "exterior_facet_bottom": op2.ON_BOTTOM}[integral_type] kwargs["iterate"] = decoration def get_map(x): return x.cell_node_map() elif integral_type in ("interior_facet", "interior_facet_vert"): extra_args.append(m.interior_facets.local_facet_dat(op2.READ)) def get_map(x): return x.interior_facet_node_map() elif integral_type == "interior_facet_horiz": decoration = op2.ON_INTERIOR_FACETS kwargs["iterate"] = decoration def get_map(x): return x.cell_node_map() else: raise ValueError("Unknown integral type '%s'" % integral_type) # Output argument if is_mat: tensor_arg = mat(lambda s: get_map(s), lambda s: get_map(s), tsbc, trbc, i, j) elif is_vec: tensor_arg = vec(lambda s: get_map(s), i) else: tensor_arg = tensor(op2.INC) coords = m.coordinates args = [kernel, itspace, tensor_arg, coords.dat(op2.READ, get_map(coords))] if needs_orientations: o = m.cell_orientations() args.append(o.dat(op2.READ, get_map(o))) if needs_cell_sizes: o = m.cell_sizes args.append(o.dat(op2.READ, get_map(o))) for n in coeff_map: c = coefficients[n] for c_ in c.split(): m_ = get_map(c_) args.append(c_.dat(op2.READ, m_)) if needs_cell_facets: assert integral_type == "cell" extra_args.append(m.cell_to_facets(op2.READ)) args.extend(extra_args) kwargs["pass_layer_arg"] = pass_layer_arg try: with collecting_loops(True): yield op2.par_loop(*args, **kwargs) except MapValueError: raise RuntimeError("Integral measure does not match measure of all coefficients/arguments") # Must apply bcs outside loop over kernels because we may wish # to apply bcs to a block which is otherwise zero, and # therefore does not have an associated kernel. if bcs is not None and is_mat: for bc in bcs: if isinstance(bc, DirichletBC): fs = bc.function_space() # Evaluate this outwith a "collecting_loops" block, # since creation of the bc nodes actually can create a # par_loop. nodes = bc.nodes if len(fs) > 1: raise RuntimeError(r"""Cannot apply boundary conditions to full mixed space. Did you forget to index it?""") shape = result_matrix.block_shape with collecting_loops(True): for i in range(shape[0]): for j in range(shape[1]): # Set diagonal entries on bc nodes to 1 if the current # block is on the matrix diagonal and its index matches the # index of the function space the bc is defined on. if i != j: continue if fs.component is None and fs.index is not None: # Mixed, index (no ComponentFunctionSpace) if fs.index == i: yield tensor[i, j].set_local_diagonal_entries(nodes) elif fs.component is not None: # ComponentFunctionSpace, check parent index if fs.parent.index is not None: # Mixed, index doesn't match if fs.parent.index != i: continue # Index matches yield tensor[i, j].set_local_diagonal_entries(nodes, idx=fs.component) elif fs.index is None: yield tensor[i, j].set_local_diagonal_entries(nodes) else: raise RuntimeError("Unhandled BC case") elif isinstance(bc, EquationBCSplit): yield from _assemble(bc.f, tensor=result_matrix, bcs=bc.bcs, form_compiler_parameters=form_compiler_parameters, inverse=inverse, mat_type=mat_type, sub_mat_type=sub_mat_type, appctx=appctx, assemble_now=assemble_now, allocate_only=False, zero_tensor=False) else: raise NotImplementedError("Undefined type of bcs class provided.") if bcs is not None and is_vec: for bc in bcs: if isinstance(bc, DirichletBC): if assemble_now: yield functools.partial(bc.apply, result_function) else: yield functools.partial(bc.zero, result_function) elif isinstance(bc, EquationBCSplit): yield functools.partial(bc.zero, result_function) yield from _assemble(bc.f, tensor=result_function, bcs=bc.bcs, form_compiler_parameters=form_compiler_parameters, inverse=inverse, mat_type=mat_type, sub_mat_type=sub_mat_type, appctx=appctx, assemble_now=assemble_now, allocate_only=False, zero_tensor=False) if zero_tensor: if is_mat: # Queue up matrix assembly (after we've done all the other operations) yield tensor.assemble() if assemble_now: yield result
def _make_parloops(expr, tensor, bcs, diagonal, fc_params, assembly_rank): """Create parloops for the assembly of the expression. :arg expr: The expression to be assembled. :arg tensor: The tensor to write to. Depending on ``expr`` and ``diagonal`` this will either be a scalar (:class:`~pyop2.op2.Global`), vector/cofunction (masquerading as a :class:`.Function`) or :class:`.Matrix`. :arg bcs: Iterable of boundary conditions. :arg diagonal: (:class:`bool`) If assembling a matrix is it diagonal? :arg fc_params: Dictionary of parameters to pass to the form compiler. :arg assembly_rank: The appropriate :class:`_AssemblyRank`. :returns: A tuple of the generated :class:`~pyop2..op2.ParLoop` objects. """ if fc_params: form_compiler_parameters = fc_params.copy() else: form_compiler_parameters = {} try: topology, = set(d.topology for d in expr.ufl_domains()) except ValueError: raise NotImplementedError( "All integration domains must share a mesh topology") for m in expr.ufl_domains(): # Ensure mesh is "initialised" (could have got here without # building a functionspace (e.g. if integrating a constant)). m.init() for o in chain(expr.arguments(), expr.coefficients()): domain = o.ufl_domain() if domain is not None and domain.topology != topology: raise NotImplementedError( "Assembly with multiple meshes not supported.") if assembly_rank == _AssemblyRank.MATRIX: test, trial = expr.arguments() create_op2arg = functools.partial(_matrix_arg, all_bcs=tuple(chain(*bcs)), matrix=tensor, Vrow=test.function_space(), Vcol=trial.function_space()) elif assembly_rank == _AssemblyRank.VECTOR: if diagonal: # actually a 2-form but throw away the trial space test, _ = expr.arguments() else: test, = expr.arguments() create_op2arg = functools.partial(_vector_arg, function=tensor, V=test.function_space()) else: create_op2arg = tensor coefficients = expr.coefficients() domains = expr.ufl_domains() if isinstance(expr, slate.TensorBase): kernels = slac.compile_expression( expr, compiler_parameters=form_compiler_parameters) else: kernels = tsfc_interface.compile_form( expr, "form", parameters=form_compiler_parameters, diagonal=diagonal) # These will be used to correctly interpret the "otherwise" # subdomain all_integer_subdomain_ids = defaultdict(list) for k in kernels: if k.kinfo.subdomain_id != "otherwise": all_integer_subdomain_ids[k.kinfo.integral_type].append( k.kinfo.subdomain_id) for k, v in all_integer_subdomain_ids.items(): all_integer_subdomain_ids[k] = tuple(sorted(v)) parloops = [] for indices, kinfo in kernels: kernel = kinfo.kernel integral_type = kinfo.integral_type domain_number = kinfo.domain_number subdomain_id = kinfo.subdomain_id coeff_map = kinfo.coefficient_map pass_layer_arg = kinfo.pass_layer_arg needs_orientations = kinfo.oriented needs_cell_facets = kinfo.needs_cell_facets needs_cell_sizes = kinfo.needs_cell_sizes m = domains[domain_number] subdomain_data = expr.subdomain_data()[m] # Find argument space indices if assembly_rank == _AssemblyRank.MATRIX: i, j = indices elif assembly_rank == _AssemblyRank.VECTOR: i, = indices else: assert len(indices) == 0 sdata = subdomain_data.get(integral_type, None) if integral_type != 'cell' and sdata is not None: raise NotImplementedError( "subdomain_data only supported with cell integrals.") # Now build arguments for the par_loop kwargs = {} # Some integrals require non-coefficient arguments at the # end (facet number information). extra_args = [] itspace = m.measure_set(integral_type, subdomain_id, all_integer_subdomain_ids) if integral_type == "cell": itspace = sdata or itspace if subdomain_id not in ["otherwise", "everywhere" ] and sdata is not None: raise ValueError("Cannot use subdomain data and subdomain_id") def get_map(x): return x.cell_node_map() elif integral_type in ("exterior_facet", "exterior_facet_vert"): extra_args.append(m.exterior_facets.local_facet_dat(op2.READ)) def get_map(x): return x.exterior_facet_node_map() elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"): # In the case of extruded meshes with horizontal facet integrals, two # parallel loops will (potentially) get created and called based on the # domain id: interior horizontal, bottom or top. kwargs["iterate"] = { "exterior_facet_top": op2.ON_TOP, "exterior_facet_bottom": op2.ON_BOTTOM }[integral_type] def get_map(x): return x.cell_node_map() elif integral_type in ("interior_facet", "interior_facet_vert"): extra_args.append(m.interior_facets.local_facet_dat(op2.READ)) def get_map(x): return x.interior_facet_node_map() elif integral_type == "interior_facet_horiz": kwargs["iterate"] = op2.ON_INTERIOR_FACETS def get_map(x): return x.cell_node_map() else: raise ValueError("Unknown integral type '%s'" % integral_type) # Output argument if assembly_rank == _AssemblyRank.MATRIX: tensor_arg = create_op2arg(op2.INC, get_map, i, j) elif assembly_rank == _AssemblyRank.VECTOR: tensor_arg = create_op2arg(op2.INC, get_map, i) else: tensor_arg = create_op2arg(op2.INC) coords = m.coordinates args = [ kernel, itspace, tensor_arg, coords.dat(op2.READ, get_map(coords)) ] if needs_orientations: o = m.cell_orientations() args.append(o.dat(op2.READ, get_map(o))) if needs_cell_sizes: o = m.cell_sizes args.append(o.dat(op2.READ, get_map(o))) for n, split_map in coeff_map: c = coefficients[n] split_c = c.split() for c_ in (split_c[i] for i in split_map): m_ = get_map(c_) args.append(c_.dat(op2.READ, m_)) if needs_cell_facets: assert integral_type == "cell" extra_args.append(m.cell_to_facets(op2.READ)) if pass_layer_arg: c = op2.Global(1, itspace.layers - 2, dtype=numpy.dtype(numpy.int32)) o = c(op2.READ) extra_args.append(o) args.extend(extra_args) kwargs["pass_layer_arg"] = pass_layer_arg try: parloops.append(op2.ParLoop(*args, **kwargs)) except MapValueError: raise RuntimeError( "Integral measure does not match measure of all coefficients/arguments" ) return tuple(parloops)
def create_parloops(expr, create_op2arg, *, assembly_rank=None, diagonal=False, form_compiler_parameters=None): """Create parallel loops for assembly of expr. :arg expr: The expression to assemble. :arg create_op2arg: callable that creates the Arg corresponding to the output tensor. :arg assembly_rank: are we assembling a scalar, vector, or matrix? :arg diagonal: For matrices are we actually assembling the diagonal into a vector? :arg form_compiler_parameters: parameters to pass to the form compiler. :returns: a generator of op2.ParLoop objects.""" coefficients = expr.coefficients() domains = expr.ufl_domains() if isinstance(expr, slate.TensorBase): if diagonal: raise NotImplementedError("Diagonal + slate not supported") kernels = slac.compile_expression( expr, tsfc_parameters=form_compiler_parameters) else: kernels = tsfc_interface.compile_form( expr, "form", parameters=form_compiler_parameters, diagonal=diagonal) # These will be used to correctly interpret the "otherwise" # subdomain all_integer_subdomain_ids = defaultdict(list) for k in kernels: if k.kinfo.subdomain_id != "otherwise": all_integer_subdomain_ids[k.kinfo.integral_type].append( k.kinfo.subdomain_id) for k, v in all_integer_subdomain_ids.items(): all_integer_subdomain_ids[k] = tuple(sorted(v)) for indices, kinfo in kernels: kernel = kinfo.kernel integral_type = kinfo.integral_type domain_number = kinfo.domain_number subdomain_id = kinfo.subdomain_id coeff_map = kinfo.coefficient_map pass_layer_arg = kinfo.pass_layer_arg needs_orientations = kinfo.oriented needs_cell_facets = kinfo.needs_cell_facets needs_cell_sizes = kinfo.needs_cell_sizes m = domains[domain_number] subdomain_data = expr.subdomain_data()[m] # Find argument space indices if assembly_rank == AssemblyRank.MATRIX: i, j = indices elif assembly_rank == AssemblyRank.VECTOR: i, = indices else: assert len(indices) == 0 sdata = subdomain_data.get(integral_type, None) if integral_type != 'cell' and sdata is not None: raise NotImplementedError( "subdomain_data only supported with cell integrals.") # Now build arguments for the par_loop kwargs = {} # Some integrals require non-coefficient arguments at the # end (facet number information). extra_args = [] itspace = m.measure_set(integral_type, subdomain_id, all_integer_subdomain_ids) if integral_type == "cell": itspace = sdata or itspace if subdomain_id not in ["otherwise", "everywhere" ] and sdata is not None: raise ValueError("Cannot use subdomain data and subdomain_id") def get_map(x): return x.cell_node_map() elif integral_type in ("exterior_facet", "exterior_facet_vert"): extra_args.append(m.exterior_facets.local_facet_dat(op2.READ)) def get_map(x): return x.exterior_facet_node_map() elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"): # In the case of extruded meshes with horizontal facet integrals, two # parallel loops will (potentially) get created and called based on the # domain id: interior horizontal, bottom or top. kwargs["iterate"] = { "exterior_facet_top": op2.ON_TOP, "exterior_facet_bottom": op2.ON_BOTTOM }[integral_type] def get_map(x): return x.cell_node_map() elif integral_type in ("interior_facet", "interior_facet_vert"): extra_args.append(m.interior_facets.local_facet_dat(op2.READ)) def get_map(x): return x.interior_facet_node_map() elif integral_type == "interior_facet_horiz": kwargs["iterate"] = op2.ON_INTERIOR_FACETS def get_map(x): return x.cell_node_map() else: raise ValueError("Unknown integral type '%s'" % integral_type) # Output argument if assembly_rank == AssemblyRank.MATRIX: tensor_arg = create_op2arg(op2.INC, get_map, i, j) elif assembly_rank == AssemblyRank.VECTOR: tensor_arg = create_op2arg(op2.INC, get_map, i) else: tensor_arg = create_op2arg(op2.INC) coords = m.coordinates args = [ kernel, itspace, tensor_arg, coords.dat(op2.READ, get_map(coords)) ] if needs_orientations: o = m.cell_orientations() args.append(o.dat(op2.READ, get_map(o))) if needs_cell_sizes: o = m.cell_sizes args.append(o.dat(op2.READ, get_map(o))) for n in coeff_map: c = coefficients[n] for c_ in c.split(): m_ = get_map(c_) args.append(c_.dat(op2.READ, m_)) if needs_cell_facets: assert integral_type == "cell" extra_args.append(m.cell_to_facets(op2.READ)) if pass_layer_arg: c = op2.Global(1, itspace.layers - 2, dtype=numpy.dtype(numpy.int32)) o = c(op2.READ) extra_args.append(o) args.extend(extra_args) kwargs["pass_layer_arg"] = pass_layer_arg try: yield op2.ParLoop(*args, **kwargs).compute except MapValueError: raise RuntimeError( "Integral measure does not match measure of all coefficients/arguments" )
def _assemble(f, tensor=None, bcs=None, form_compiler_parameters=None, inverse=False, mat_type=None, sub_mat_type=None, appctx={}, collect_loops=False, allocate_only=False): """Assemble the form f and return a Firedrake object representing the result. This will be a :class:`float` for 0-forms, a :class:`.Function` for 1-forms and a :class:`.Matrix` for 2-forms. :arg bcs: A tuple of :class`.DirichletBC`\s to be applied. :arg tensor: An existing tensor object into which the form should be assembled. If this is not supplied, a new tensor will be created for the purpose. :arg form_compiler_parameters: (optional) dict of parameters to pass to the form compiler. :arg inverse: (optional) if f is a 2-form, then assemble the inverse of the local matrices. :arg mat_type: (optional) type for assembled matrices, one of "nest", "aij", "baij", or "matfree". :arg sub_mat_type: (optional) type for assembled sub matrices inside a "nest" matrix. One of "aij" or "baij". :arg appctx: Additional information to hang on the assembled matrix if an implicit matrix is requested (mat_type "matfree"). """ if mat_type is None: mat_type = parameters.parameters["default_matrix_type"] if mat_type not in ["matfree", "aij", "baij", "nest"]: raise ValueError("Unrecognised matrix type, '%s'" % mat_type) if sub_mat_type is None: sub_mat_type = parameters.parameters["default_sub_matrix_type"] if sub_mat_type not in ["aij", "baij"]: raise ValueError("Invalid submatrix type, '%s' (not 'aij' or 'baij')", sub_mat_type) if form_compiler_parameters: form_compiler_parameters = form_compiler_parameters.copy() else: form_compiler_parameters = {} form_compiler_parameters["assemble_inverse"] = inverse if isinstance(f, slate.TensorBase): kernels = slac.compile_expression(f, tsfc_parameters=form_compiler_parameters) integral_types = [kernel.kinfo.integral_type for kernel in kernels] else: kernels = tsfc_interface.compile_form(f, "form", parameters=form_compiler_parameters, inverse=inverse) integral_types = [integral.integral_type() for integral in f.integrals()] rank = len(f.arguments()) is_mat = rank == 2 is_vec = rank == 1 if any((coeff.function_space() and coeff.function_space().component is not None) for coeff in f.coefficients()): raise NotImplementedError("Integration of subscripted VFS not yet implemented") if inverse and rank != 2: raise ValueError("Can only assemble the inverse of a 2-form") zero_tensor = lambda: None if is_mat: matfree = mat_type == "matfree" nest = mat_type == "nest" if nest: baij = sub_mat_type == "baij" else: baij = mat_type == "baij" if matfree: # intercept matrix-free matrices here if inverse: raise NotImplementedError("Inverse not implemented with matfree") if collect_loops: raise NotImplementedError("Can't collect loops with matfree") if tensor is None: return matrix.ImplicitMatrix(f, bcs, fc_params=form_compiler_parameters, appctx=appctx) if not isinstance(tensor, matrix.ImplicitMatrix): raise ValueError("Expecting implicit matrix with matfree") tensor.assemble() return tensor test, trial = f.arguments() map_pairs = [] cell_domains = [] exterior_facet_domains = [] interior_facet_domains = [] if tensor is None: # For horizontal facets of extruded meshes, the corresponding domain # in the base mesh is the cell domain. Hence all the maps used for top # bottom and interior horizontal facets will use the cell to dofs map # coming from the base mesh as a starting point for the actual dynamic map # computation. for integral_type in integral_types: if integral_type == "cell": cell_domains.append(op2.ALL) elif integral_type == "exterior_facet": exterior_facet_domains.append(op2.ALL) elif integral_type == "interior_facet": interior_facet_domains.append(op2.ALL) elif integral_type == "exterior_facet_bottom": cell_domains.append(op2.ON_BOTTOM) elif integral_type == "exterior_facet_top": cell_domains.append(op2.ON_TOP) elif integral_type == "exterior_facet_vert": exterior_facet_domains.append(op2.ALL) elif integral_type == "interior_facet_horiz": cell_domains.append(op2.ON_INTERIOR_FACETS) elif integral_type == "interior_facet_vert": interior_facet_domains.append(op2.ALL) else: raise ValueError('Unknown integral type "%s"' % integral_type) # To avoid an extra check for extruded domains, the maps that are being passed in # are DecoratedMaps. For the non-extruded case the DecoratedMaps don't restrict the # space over which we iterate as the domains are dropped at Sparsity construction # time. In the extruded case the cell domains are used to identify the regions of the # mesh which require allocation in the sparsity. if cell_domains: map_pairs.append((op2.DecoratedMap(test.cell_node_map(), cell_domains), op2.DecoratedMap(trial.cell_node_map(), cell_domains))) if exterior_facet_domains: map_pairs.append((op2.DecoratedMap(test.exterior_facet_node_map(), exterior_facet_domains), op2.DecoratedMap(trial.exterior_facet_node_map(), exterior_facet_domains))) if interior_facet_domains: map_pairs.append((op2.DecoratedMap(test.interior_facet_node_map(), interior_facet_domains), op2.DecoratedMap(trial.interior_facet_node_map(), interior_facet_domains))) map_pairs = tuple(map_pairs) # Construct OP2 Mat to assemble into fs_names = (test.function_space().name, trial.function_space().name) sparsity = op2.Sparsity((test.function_space().dof_dset, trial.function_space().dof_dset), map_pairs, "%s_%s_sparsity" % fs_names, nest=nest, block_sparse=baij) result_matrix = matrix.Matrix(f, bcs, sparsity, numpy.float64, "%s_%s_matrix" % fs_names) tensor = result_matrix._M else: if isinstance(tensor, matrix.ImplicitMatrix): raise ValueError("Expecting matfree with implicit matrix") result_matrix = tensor # Replace any bcs on the tensor we passed in result_matrix.bcs = bcs tensor = tensor._M zero_tensor = tensor.zero if result_matrix.block_shape != (1, 1) and mat_type == "baij": raise ValueError("BAIJ matrix type makes no sense for mixed spaces, use 'aij'") def mat(testmap, trialmap, i, j): return tensor[i, j](op2.INC, (testmap(test.function_space()[i])[op2.i[0]], trialmap(trial.function_space()[j])[op2.i[1]])) result = lambda: result_matrix if allocate_only: result_matrix._assembly_callback = None return result_matrix elif is_vec: test = f.arguments()[0] if tensor is None: result_function = function.Function(test.function_space()) tensor = result_function.dat else: result_function = tensor tensor = result_function.dat zero_tensor = tensor.zero def vec(testmap, i): return tensor[i](op2.INC, testmap(test.function_space()[i])[op2.i[0]]) result = lambda: result_function else: # 0-forms are always scalar if tensor is None: tensor = op2.Global(1, [0.0]) else: raise ValueError("Can't assemble 0-form into existing tensor") result = lambda: tensor.data[0] coefficients = f.coefficients() domains = f.ufl_domains() for m in domains: # Ensure mesh is "initialised" (could have got here without # building a functionspace (e.g. if integrating a constant)). m.init() if m.topology != domains[0].topology: raise NotImplementedError("All integration domains must share a mesh topology.") # These will be used to correctly interpret the "otherwise" # subdomain all_integer_subdomain_ids = defaultdict(list) for k in kernels: if k.kinfo.subdomain_id != "otherwise": all_integer_subdomain_ids[k.kinfo.integral_type].append(k.kinfo.subdomain_id) for k, v in all_integer_subdomain_ids.items(): all_integer_subdomain_ids[k] = tuple(sorted(v)) # Since applying boundary conditions to a matrix changes the # initial assembly, to support: # A = assemble(a) # bc.apply(A) # solve(A, ...) # we need to defer actually assembling the matrix until just # before we need it (when we know if there are any bcs to be # applied). To do so, we build a closure that carries out the # assembly and stash that on the Matrix object. When we hit a # solve, we funcall the closure with any bcs the Matrix now has to # assemble it. # In collecting loops mode, we collect the loops, and assume the # boundary conditions provided are the ones we want. It therefore # is only used inside residual and jacobian assembly. loops = [] def thunk(bcs): if collect_loops: loops.append(zero_tensor) else: zero_tensor() for indices, (kernel, integral_type, needs_orientations, subdomain_id, domain_number, coeff_map, needs_cell_facets) in kernels: m = domains[domain_number] subdomain_data = f.subdomain_data()[m] # Find argument space indices if is_mat: i, j = indices elif is_vec: i, = indices else: assert len(indices) == 0 sdata = subdomain_data.get(integral_type, None) if integral_type != 'cell' and sdata is not None: raise NotImplementedError("subdomain_data only supported with cell integrals.") # Extract block from tensor and test/trial spaces # FIXME Ugly variable renaming required because functions are not # lexical closures in Python and we're writing to these variables if is_mat and result_matrix.block_shape > (1, 1): tsbc = [] trbc = [] # Unwind ComponentFunctionSpace to check for matching BCs for bc in bcs: fs = bc.function_space() if fs.component is not None: fs = fs.parent if fs.index == i: tsbc.append(bc) if fs.index == j: trbc.append(bc) elif is_mat: tsbc, trbc = bcs, bcs # Now build arguments for the par_loop kwargs = {} # Some integrals require non-coefficient arguments at the # end (facet number information). extra_args = [] # Decoration for applying to matrix maps in extruded case decoration = None itspace = m.measure_set(integral_type, subdomain_id, all_integer_subdomain_ids) if integral_type == "cell": itspace = sdata or itspace if subdomain_id not in ["otherwise", "everywhere"] and \ sdata is not None: raise ValueError("Cannot use subdomain data and subdomain_id") def get_map(x, bcs=None, decoration=None): return x.cell_node_map(bcs) elif integral_type in ("exterior_facet", "exterior_facet_vert"): extra_args.append(m.exterior_facets.local_facet_dat(op2.READ)) def get_map(x, bcs=None, decoration=None): return x.exterior_facet_node_map(bcs) elif integral_type in ("exterior_facet_top", "exterior_facet_bottom"): # In the case of extruded meshes with horizontal facet integrals, two # parallel loops will (potentially) get created and called based on the # domain id: interior horizontal, bottom or top. decoration = {"exterior_facet_top": op2.ON_TOP, "exterior_facet_bottom": op2.ON_BOTTOM}[integral_type] kwargs["iterate"] = decoration def get_map(x, bcs=None, decoration=None): map_ = x.cell_node_map(bcs) if decoration is not None: return op2.DecoratedMap(map_, decoration) return map_ elif integral_type in ("interior_facet", "interior_facet_vert"): extra_args.append(m.interior_facets.local_facet_dat(op2.READ)) def get_map(x, bcs=None, decoration=None): return x.interior_facet_node_map(bcs) elif integral_type == "interior_facet_horiz": decoration = op2.ON_INTERIOR_FACETS kwargs["iterate"] = decoration def get_map(x, bcs=None, decoration=None): map_ = x.cell_node_map(bcs) if decoration is not None: return op2.DecoratedMap(map_, decoration) return map_ else: raise ValueError("Unknown integral type '%s'" % integral_type) # Output argument if is_mat: tensor_arg = mat(lambda s: get_map(s, tsbc, decoration), lambda s: get_map(s, trbc, decoration), i, j) elif is_vec: tensor_arg = vec(lambda s: get_map(s), i) else: tensor_arg = tensor(op2.INC) coords = m.coordinates args = [kernel, itspace, tensor_arg, coords.dat(op2.READ, get_map(coords))] if needs_orientations: o = m.cell_orientations() args.append(o.dat(op2.READ, get_map(o))) for n in coeff_map: c = coefficients[n] for c_ in c.split(): args.append(c_.dat(op2.READ, get_map(c_))) if needs_cell_facets: assert integral_type == "cell" extra_args.append(m.cell_to_facet_map(op2.READ)) args.extend(extra_args) try: with collecting_loops(collect_loops): loops.append(op2.par_loop(*args, **kwargs)) except MapValueError: raise RuntimeError("Integral measure does not match measure of all coefficients/arguments") # Must apply bcs outside loop over kernels because we may wish # to apply bcs to a block which is otherwise zero, and # therefore does not have an associated kernel. if bcs is not None and is_mat: for bc in bcs: fs = bc.function_space() # Evaluate this outwith a "collecting_loops" block, # since creation of the bc nodes actually can create a # par_loop. nodes = bc.nodes if len(fs) > 1: raise RuntimeError("""Cannot apply boundary conditions to full mixed space. Did you forget to index it?""") shape = result_matrix.block_shape with collecting_loops(collect_loops): for i in range(shape[0]): for j in range(shape[1]): # Set diagonal entries on bc nodes to 1 if the current # block is on the matrix diagonal and its index matches the # index of the function space the bc is defined on. if i != j: continue if fs.component is None and fs.index is not None: # Mixed, index (no ComponentFunctionSpace) if fs.index == i: loops.append(tensor[i, j].set_local_diagonal_entries(nodes)) elif fs.component is not None: # ComponentFunctionSpace, check parent index if fs.parent.index is not None: # Mixed, index doesn't match if fs.parent.index != i: continue # Index matches loops.append(tensor[i, j].set_local_diagonal_entries(nodes, idx=fs.component)) elif fs.index is None: loops.append(tensor[i, j].set_local_diagonal_entries(nodes)) else: raise RuntimeError("Unhandled BC case") if bcs is not None and is_vec: if len(bcs) > 0 and collect_loops: raise NotImplementedError("Loop collection not handled in this case") for bc in bcs: bc.apply(result_function) if is_mat: # Queue up matrix assembly (after we've done all the other operations) loops.append(tensor.assemble()) return result() if collect_loops: thunk(bcs) return loops if is_mat: result_matrix._assembly_callback = thunk return result() else: return thunk(bcs)
def dg_injection_kernel(Vf, Vc, ncell): from firedrake import Tensor, AssembledVector, TestFunction, TrialFunction from firedrake.slate.slac import compile_expression macro_builder = MacroKernelBuilder(ncell) f = ufl.Coefficient(Vf) macro_builder.set_coefficients([f]) macro_builder.set_coordinates(Vf.mesh()) Vfe = create_element(Vf.ufl_element()) macro_quadrature_rule = make_quadrature(Vfe.cell, estimate_total_polynomial_degree(ufl.inner(f, f))) index_cache = {} parameters = default_parameters() integration_dim, entity_ids = lower_integral_type(Vfe.cell, "cell") macro_cfg = dict(interface=macro_builder, ufl_cell=Vf.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=macro_quadrature_rule) fexpr, = fem.compile_ufl(f, **macro_cfg) X = ufl.SpatialCoordinate(Vf.mesh()) C_a, = fem.compile_ufl(X, **macro_cfg) detJ = ufl_utils.preprocess_expression(abs(ufl.JacobianDeterminant(f.ufl_domain()))) macro_detJ, = fem.compile_ufl(detJ, **macro_cfg) Vce = create_element(Vc.ufl_element()) coarse_builder = firedrake_interface.KernelBuilder("cell", "otherwise", 0) coarse_builder.set_coordinates(Vc.mesh()) argument_multiindices = (Vce.get_indices(), ) argument_multiindex, = argument_multiindices return_variable, = coarse_builder.set_arguments((ufl.TestFunction(Vc), ), argument_multiindices) integration_dim, entity_ids = lower_integral_type(Vce.cell, "cell") # Midpoint quadrature for jacobian on coarse cell. quadrature_rule = make_quadrature(Vce.cell, 0) coarse_cfg = dict(interface=coarse_builder, ufl_cell=Vc.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=quadrature_rule) X = ufl.SpatialCoordinate(Vc.mesh()) K = ufl_utils.preprocess_expression(ufl.JacobianInverse(Vc.mesh())) C_0, = fem.compile_ufl(X, **coarse_cfg) K, = fem.compile_ufl(K, **coarse_cfg) i = gem.Index() j = gem.Index() C_0 = gem.Indexed(C_0, (j, )) C_0 = gem.index_sum(C_0, quadrature_rule.point_set.indices) C_a = gem.Indexed(C_a, (j, )) X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), C_a)) K_ij = gem.Indexed(K, (i, j)) K_ij = gem.index_sum(K_ij, quadrature_rule.point_set.indices) X_a = gem.index_sum(gem.Product(K_ij, X_a), (j, )) C_0, = quadrature_rule.point_set.points C_0 = gem.Indexed(gem.Literal(C_0), (i, )) # fine quad points in coarse reference space. X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), X_a)) X_a = gem.ComponentTensor(X_a, (i, )) # Coarse basis function evaluated at fine quadrature points phi_c = fem.fiat_to_ufl(Vce.point_evaluation(0, X_a, (Vce.cell.get_dimension(), 0)), 0) tensor_indices = tuple(gem.Index(extent=d) for d in f.ufl_shape) phi_c = gem.Indexed(phi_c, argument_multiindex + tensor_indices) fexpr = gem.Indexed(fexpr, tensor_indices) quadrature_weight = macro_quadrature_rule.weight_expression expr = gem.Product(gem.IndexSum(gem.Product(phi_c, fexpr), tensor_indices), gem.Product(macro_detJ, quadrature_weight)) quadrature_indices = macro_builder.indices + macro_quadrature_rule.point_set.indices reps = spectral.Integrals([expr], quadrature_indices, argument_multiindices, parameters) assignments = spectral.flatten([(return_variable, reps)], index_cache) return_variables, expressions = zip(*assignments) expressions = impero_utils.preprocess_gem(expressions, **spectral.finalise_options) assignments = list(zip(return_variables, expressions)) impero_c = impero_utils.compile_gem(assignments, quadrature_indices + argument_multiindex, remove_zeros=True) index_names = [] def name_index(index, name): index_names.append((index, name)) if index in index_cache: for multiindex, suffix in zip(index_cache[index], string.ascii_lowercase): name_multiindex(multiindex, name + suffix) def name_multiindex(multiindex, name): if len(multiindex) == 1: name_index(multiindex[0], name) else: for i, index in enumerate(multiindex): name_index(index, name + str(i)) name_multiindex(quadrature_indices, 'ip') for multiindex, name in zip(argument_multiindices, ['j', 'k']): name_multiindex(multiindex, name) index_names.extend(zip(macro_builder.indices, ["entity"])) body = generate_coffee(impero_c, index_names, parameters["precision"]) retarg = ast.Decl(SCALAR_TYPE, ast.Symbol("R", rank=(Vce.space_dimension(), ))) local_tensor = coarse_builder.local_tensor local_tensor.init = ast.ArrayInit(numpy.zeros(Vce.space_dimension(), dtype=SCALAR_TYPE)) body.children.insert(0, local_tensor) args = [retarg] + macro_builder.kernel_args + [macro_builder.coordinates_arg, coarse_builder.coordinates_arg] # Now we have the kernel that computes <f, phi_c>dx_c # So now we need to hit it with the inverse mass matrix on dx_c u = TrialFunction(Vc) v = TestFunction(Vc) expr = Tensor(ufl.inner(u, v)*ufl.dx).inv * AssembledVector(ufl.Coefficient(Vc)) Ainv, = compile_expression(expr) Ainv = Ainv.kinfo.kernel A = ast.Symbol(local_tensor.sym.symbol) R = ast.Symbol("R") body.children.append(ast.FunCall(Ainv.name, R, coarse_builder.coordinates_arg.sym, A)) from coffee.base import Node assert isinstance(Ainv._code, Node) return op2.Kernel(ast.Node([Ainv._code, ast.FunDecl("void", "pyop2_kernel_injection_dg", args, body, pred=["static", "inline"])]), name="pyop2_kernel_injection_dg", cpp=True, include_dirs=Ainv._include_dirs, headers=Ainv._headers)