def _expression_mathfunction(expr, parameters): name_map = { 'abs': 'fabs', 'ln': 'log', # Bessel functions 'cyl_bessel_j': 'jn', 'cyl_bessel_y': 'yn', # Modified Bessel functions (C++ only) # # These mappings work for FEniCS only, and fail with Firedrake # since no Boost available. 'cyl_bessel_i': 'boost::math::cyl_bessel_i', 'cyl_bessel_k': 'boost::math::cyl_bessel_k', } name = name_map.get(expr.name, expr.name) if name == 'jn': nu, arg = expr.children if nu == gem.Zero(): return coffee.FunCall('j0', expression(arg, parameters)) elif nu == gem.one: return coffee.FunCall('j1', expression(arg, parameters)) if name == 'yn': nu, arg = expr.children if nu == gem.Zero(): return coffee.FunCall('y0', expression(arg, parameters)) elif nu == gem.one: return coffee.FunCall('y1', expression(arg, parameters)) return coffee.FunCall(name, *[expression(c, parameters) for c in expr.children])
def extruded_int_horiz_facet(exp, builder, top_sks, bottom_sks, coordsym, mesh_layer_sym, cell_orientations): """Generates a code statement for evaluating interior horizontal facet integrals. :arg exp: A :class:`TensorBase` expression. :arg builder: A :class:`KernelBuilder` containing the expression context. :arg top_sks: An iterable of index ordered TSFC kernels for the top kernels. :arg bottom_sks: An iterable of index ordered TSFC kernels for the bottom kernels. :arg coordsym: An `ast.Symbol` object representing coordinate arguments for the kernel. :arg mesh_layer_sym: An `ast.Symbol` representing the mesh layer. :arg cell_orientations: An `ast.Symbol` representing cell orientation information. Returns: A COFFEE code statement and updated include_dirs """ t = builder.temps[exp] nlayers = exp.ufl_domain().topological.layers - 1 incl = [] top_calls = [] bottom_calls = [] for top, btm in zip(top_sks, bottom_sks): assert top.indices == btm.indices, ( "Top and bottom kernels must have the same indices" ) index = top.indices # Generate an iterable of coefficients to pass to the subkernel # if any are required c_set = top.kinfo.coefficient_map + btm.kinfo.coefficient_map coefficient_map = tuple(OrderedDict.fromkeys(c_set)) clist = [c for ci in coefficient_map for c in builder.coefficient(exp.coefficients()[ci])] # TODO: Is this safe? if top.kinfo.oriented and btm.kinfo.oriented: clist.append(cell_orientations) dirs = top.kinfo.kernel._include_dirs + btm.kinfo.kernel._include_dirs incl.extend(tuple(OrderedDict.fromkeys(dirs))) tensor = eigen_tensor(exp, t, index) top_calls.append(ast.FunCall(top.kinfo.kernel.name, tensor, coordsym, *clist)) bottom_calls.append(ast.FunCall(btm.kinfo.kernel.name, tensor, coordsym, *clist)) else_stmt = ast.Block(top_calls + bottom_calls, open_scope=True) inter_stmt = ast.If(ast.Eq(mesh_layer_sym, nlayers - 1), (ast.Block(bottom_calls, open_scope=True), else_stmt)) stmt = ast.If(ast.Eq(mesh_layer_sym, 0), (ast.Block(top_calls, open_scope=True), inter_stmt)) return stmt, incl
def _expression_power(expr, parameters): base, exponent = expr.children if parameters.scalar_type is 'double complex': return coffee.FunCall("cpow", expression(base, parameters), expression(exponent, parameters)) else: return coffee.FunCall("pow", expression(base, parameters), expression(exponent, parameters))
def _expression_mathfunction(expr, parameters): name_map = { 'abs': 'fabs', 'ln': 'log', # Bessel functions 'cyl_bessel_j': 'jn', 'cyl_bessel_y': 'yn', # Modified Bessel functions (C++ only) # # These mappings work for FEniCS only, and fail with Firedrake # since no Boost available. 'cyl_bessel_i': 'boost::math::cyl_bessel_i', 'cyl_bessel_k': 'boost::math::cyl_bessel_k', } complex_name_map = { 'ln': 'clog', 'conj': 'conj' # TODO: Are there different complex Bessel Functions? } if parameters.scalar_type == 'double complex': name = complex_name_map.get(expr.name, expr.name) if name in { 'sin', 'cos', 'tan', 'sqrt', 'exp', 'abs', 'sinh', 'cosh', 'tanh', 'sinh', 'acos', 'asin', 'atan', 'real', 'imag' }: name = 'c' + expr.name else: name = name_map.get(expr.name, expr.name) if name == 'jn': nu, arg = expr.children if nu == gem.Zero(): return coffee.FunCall('j0', expression(arg, parameters)) elif nu == gem.one: return coffee.FunCall('j1', expression(arg, parameters)) if name == 'yn': nu, arg = expr.children if nu == gem.Zero(): return coffee.FunCall('y0', expression(arg, parameters)) elif nu == gem.one: return coffee.FunCall('y1', expression(arg, parameters)) return coffee.FunCall(name, *[expression(c, parameters) for c in expr.children])
def visit_rhs(node): """Create a PyOP2 AST-conformed object starting from a FFC node. """ if isinstance(node, Expression): return node(*[visit_rhs(a) for a in node.args]) def create_pyop2_node(typ, exp1, exp2): """Create an expr node starting from two FFC symbols.""" if typ == 2: return pyop2.Prod(exp1, exp2) if typ == 3: return pyop2.Sum(exp1, exp2) if typ == 4: return pyop2.Div(exp1, exp2) def create_nested_pyop2_node(typ, nodes): """Create a subtree for the PyOP2 AST from a generic FFC expr. """ if len(nodes) == 2: return create_pyop2_node(typ, nodes[0], nodes[1]) else: return create_pyop2_node(typ, nodes[0], \ create_nested_pyop2_node(typ, nodes[1:])) if node._prec == 0: # Float return pyop2.Symbol(node.val, ()) if node._prec == 1: # Symbol rank, offset = [], [] for i in node.loop_index: if hasattr(i, 'offset') and hasattr(i, 'loop_index'): rank.append(i.loop_index) offset.append((1, i.offset)) else: rank.append(i) offset.append((1, 0)) return pyop2.Symbol(node.ide, tuple(rank), tuple(offset)) if node._prec in [2, 3] and len(node.vrs) == 1: # "Fake" Product, "Fake" Sum return pyop2.Par(visit_rhs(node.vrs[0])) if node._prec == 5: # Function call return pyop2.FunCall(node.funname, *[visit_rhs(n) for n in node.vrs]) children = [] if node._prec == 4: # Fraction children = [visit_rhs(node.num), visit_rhs(node.denom)] else: # Product, Sum children = [visit_rhs(n) for n in reversed(node.vrs)] # PyOP2's ast expr are binary, so we deal with this here return pyop2.Par(create_nested_pyop2_node(node._prec, children))
def extruded_top_bottom_facet(cxt_kernel, builder, coordsym, mesh_layer_sym, cell_orientations): """Generates a code statement for evaluating exterior top/bottom facet integrals. :arg cxt_kernel: A :namedtuple:`ContextKernel` containing all relevant integral types and TSFC kernels associated with the form nested in the expression. :arg builder: A :class:`KernelBuilder` containing the expression context. :arg coordsym: An `ast.Symbol` object representing coordinate arguments for the kernel. :arg mesh_layer_sym: An `ast.Symbol` representing the mesh layer. :arg cell_orientations: An `ast.Symbol` representing cell orientation information. Returns: A COFFEE code statement and updated include_dirs """ exp = cxt_kernel.tensor t = builder.temps[exp] nlayers = exp.ufl_domain().topological.layers - 1 incl = [] body = [] for splitkernel in cxt_kernel.tsfc_kernels: index = splitkernel.indices kinfo = splitkernel.kinfo # Generate an iterable of coefficients to pass to the subkernel # if any are required clist = [ c for ci in kinfo.coefficient_map for c in builder.coefficient(exp.coefficients()[ci]) ] if kinfo.oriented: clist.insert(0, cell_orientations) incl.extend(kinfo.kernel._include_dirs) tensor = eigen_tensor(exp, t, index) body.append(ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist)) if cxt_kernel.original_integral_type == "exterior_facet_bottom": layer = 0 else: layer = nlayers - 1 stmt = ast.If(ast.Eq(mesh_layer_sym, layer), [ast.Block(body, open_scope=True)]) return stmt, incl
def _expression_mathfunction(expr, parameters): complex_mode = int(is_complex(parameters.scalar_type)) # Bessel functions if expr.name.startswith('cyl_bessel_'): if complex_mode: msg = "Bessel functions for complex numbers: missing implementation" raise NotImplementedError(msg) nu, arg = expr.children nu_thunk = lambda: expression(nu, parameters) arg_coffee = expression(arg, parameters) if expr.name == 'cyl_bessel_j': if nu == gem.Zero(): return coffee.FunCall('j0', arg_coffee) elif nu == gem.one: return coffee.FunCall('j1', arg_coffee) else: return coffee.FunCall('jn', nu_thunk(), arg_coffee) if expr.name == 'cyl_bessel_y': if nu == gem.Zero(): return coffee.FunCall('y0', arg_coffee) elif nu == gem.one: return coffee.FunCall('y1', arg_coffee) else: return coffee.FunCall('yn', nu_thunk(), arg_coffee) # Modified Bessel functions (C++ only) # # These mappings work for FEniCS only, and fail with Firedrake # since no Boost available. if expr.name in ['cyl_bessel_i', 'cyl_bessel_k']: name = 'boost::math::' + expr.name return coffee.FunCall(name, nu_thunk(), arg_coffee) assert False, "Unknown Bessel function: {}".format(expr.name) # Other math functions name = math_table[expr.name][complex_mode] if name is None: raise RuntimeError("{} not supported in complex mode".format( expr.name)) return coffee.FunCall(name, *[expression(c, parameters) for c in expr.children])
into a new :class:`.Function`.""" result = function.Function(ExpressionWalker().walk(expr)[2]) evaluate_expression(Assign(result, expr), subset) return result _to_sum = lambda o: ast.Sum(_ast(o[0]), _to_sum(o[1:])) if len( o) > 1 else _ast(o[0]) _to_prod = lambda o: ast.Prod(_ast(o[0]), _to_sum(o[1:])) if len( o) > 1 else _ast(o[0]) _to_aug_assign = lambda op, o: op(_ast(o[0]), _ast(o[1])) _ast_map = { MathFunction: (lambda e: ast.FunCall(e._name, *[_ast(o) for o in e.ufl_operands])), ufl.algebra.Sum: (lambda e: ast.Par(_to_sum(e.ufl_operands))), ufl.algebra.Product: (lambda e: ast.Par(_to_prod(e.ufl_operands))), ufl.algebra.Division: (lambda e: ast.Par(ast.Div(*[_ast(o) for o in e.ufl_operands]))), ufl.algebra.Abs: (lambda e: ast.FunCall("abs", _ast(e.ufl_operands[0]))), Assign: (lambda e: _to_aug_assign(e._ast, e.ufl_operands)), AugmentedAssignment: (lambda e: _to_aug_assign(e._ast, e.ufl_operands)), ufl.constantvalue.ScalarValue: (lambda e: ast.Symbol(e._value)), ufl.constantvalue.Zero: (lambda e: ast.Symbol(0)), ufl.classes.Conditional: (lambda e: ast.Ternary(*[_ast(o) for o in e.ufl_operands])), ufl.classes.EQ: (lambda e: ast.Eq(*[_ast(o) for o in e.ufl_operands])), ufl.classes.NE: (lambda e: ast.NEq(*[_ast(o) for o in e.ufl_operands])), ufl.classes.LT: (lambda e: ast.Less(*[_ast(o) for o in e.ufl_operands])), ufl.classes.LE: (lambda e: ast.LessEq(*[_ast(o) for o in e.ufl_operands])),
def compile_expression(slate_expr, tsfc_parameters=None): """Takes a Slate expression `slate_expr` and returns the appropriate :class:`firedrake.op2.Kernel` object representing the Slate expression. :arg slate_expr: a :class:'TensorBase' expression. :arg tsfc_parameters: an optional `dict` of form compiler parameters to be passed onto TSFC during the compilation of ufl forms. Returns: A `tuple` containing a `SplitKernel(idx, kinfo)` """ if not isinstance(slate_expr, TensorBase): raise ValueError("Expecting a `TensorBase` expression, not %s" % type(slate_expr)) # TODO: Get PyOP2 to write into mixed dats if any(len(a.function_space()) > 1 for a in slate_expr.arguments()): raise NotImplementedError("Compiling mixed slate expressions") # If the expression has already been symbolically compiled, then # simply reuse the produced kernel. if slate_expr._metakernel_cache is not None: return slate_expr._metakernel_cache # Initialize coefficients, shape and statements list expr_coeffs = slate_expr.coefficients() # We treat scalars as 1x1 MatrixBase objects, so we give # the right shape to do so and everything just falls out. # This bit here ensures the return result has the right # shape if slate_expr.rank == 0: shape = (1, ) else: shape = slate_expr.shape statements = [] # Create a builder for the Slate expression builder = KernelBuilder(expression=slate_expr, tsfc_parameters=tsfc_parameters) # Initialize coordinate, cell orientations and facet/layer # symbols coordsym = ast.Symbol("coords") coords = None cell_orientations = ast.Symbol("cell_orientations") cellfacetsym = ast.Symbol("cell_facets") mesh_layer_sym = ast.Symbol("layer") inc = [] # We keep track of temporaries that have been declared declared_temps = {} for cxt_kernel in builder.context_kernels: exp = cxt_kernel.tensor t = builder.temps[exp] if exp not in declared_temps: # Declare and initialize the temporary statements.append(ast.Decl(eigen_matrixbase_type(exp.shape), t)) statements.append(ast.FlatBlock("%s.setZero();\n" % t)) declared_temps[exp] = t it_type = cxt_kernel.original_integral_type if it_type not in supported_integral_types: raise NotImplementedError("Type %s not supported." % it_type) # Explicit checking of coordinates coordinates = exp.ufl_domain().coordinates if coords is not None: assert coordinates == coords else: coords = coordinates if it_type == "cell": # Nothing difficult about cellwise integrals. Just need # to get coefficient info, include_dirs and append # function calls to the appropriate subkernels. # If tensor is mixed, there will be more than one SplitKernel incl = [] for splitkernel in cxt_kernel.tsfc_kernels: index = splitkernel.indices kinfo = splitkernel.kinfo # Generate an iterable of coefficients to pass to the subkernel # if any are required clist = [ c for ci in kinfo.coefficient_map for c in builder.coefficient(exp.coefficients()[ci]) ] if kinfo.oriented: clist.insert(0, cell_orientations) incl.extend(kinfo.kernel._include_dirs) tensor = eigen_tensor(exp, t, index) statements.append( ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist)) elif it_type in [ "interior_facet", "exterior_facet", "interior_facet_vert", "exterior_facet_vert" ]: # These integral types will require accessing local facet # information and looping over facet indices. builder.require_cell_facets() loop_stmt, incl = facet_integral_loop(cxt_kernel, builder, coordsym, cellfacetsym, cell_orientations) statements.append(loop_stmt) elif it_type == "interior_facet_horiz": # The infamous interior horizontal facet # will have two SplitKernels: one top, # one bottom. The mesh layer will determine # which kernels we call. builder.require_mesh_layers() top_sks = [ k for k in cxt_kernel.tsfc_kernels if k.kinfo.integral_type == "exterior_facet_top" ] bottom_sks = [ k for k in cxt_kernel.tsfc_kernels if k.kinfo.integral_type == "exterior_facet_bottom" ] assert len(top_sks) == len(bottom_sks), ( "Number of top and bottom kernels should be equal") # Top and bottom kernels need to be sorted by kinfo.indices # if the space is mixed to ensure indices match. top_sks = sorted(top_sks, key=lambda x: x.indices) bottom_sks = sorted(bottom_sks, key=lambda x: x.indices) stmt, incl = extruded_int_horiz_facet(exp, builder, top_sks, bottom_sks, coordsym, mesh_layer_sym, cell_orientations) statements.append(stmt) elif it_type in ["exterior_facet_bottom", "exterior_facet_top"]: # These kernels will only be called if we are on # the top or bottom layers of the extruded mesh. builder.require_mesh_layers() stmt, incl = extruded_top_bottom_facet(cxt_kernel, builder, coordsym, mesh_layer_sym, cell_orientations) statements.append(stmt) else: raise ValueError("Kernel type not recognized: %s" % it_type) # Don't duplicate include lines inc_dir = list(set(incl) - set(inc)) inc.extend(inc_dir) # Now we handle any terms that require auxiliary temporaries, # such as inverses, transposes and actions of a tensor on a # coefficient if builder.aux_exprs: # The declared temps will be updated within this method aux_statements = auxiliary_temporaries(builder, declared_temps) statements.extend(aux_statements) # Now we create the result statement by declaring its eigen type and # using Eigen::Map to move between Eigen and C data structs. result_sym = ast.Symbol("T%d" % len(builder.temps)) result_data_sym = ast.Symbol("A%d" % len(builder.temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape)) result_statement = ast.FlatBlock( "%s %s((%s *)%s);\n" % (result_type, result_sym, SCALAR_TYPE, result_data_sym)) statements.append(result_statement) # Generate the complete c++ string performing the linear algebra operations # on Eigen matrices/vectors cpp_string = ast.FlatBlock( metaphrase_slate_to_cpp(slate_expr, declared_temps)) statements.append(ast.Incr(result_sym, cpp_string)) # Finalize AST for macro kernel construction builder._finalize_kernels_and_update() # Generate arguments for the macro kernel args = [result, ast.Decl("%s **" % SCALAR_TYPE, coordsym)] # Orientation information if builder.oriented: args.append(ast.Decl("int **", cell_orientations)) # Coefficient information for c in expr_coeffs: if isinstance(c, Constant): ctype = "%s *" % SCALAR_TYPE else: ctype = "%s **" % SCALAR_TYPE args.extend([ast.Decl(ctype, csym) for csym in builder.coefficient(c)]) # Facet information if builder.needs_cell_facets: args.append( ast.Decl("%s *" % as_cstr(cell_to_facets_dtype), cellfacetsym)) # NOTE: We need to be careful about the ordering here. Mesh layers are # added as the final argument to the kernel. if builder.needs_mesh_layers: args.append(ast.Decl("int", mesh_layer_sym)) # NOTE: In the future we may want to have more than one "macro_kernel" macro_kernel_name = "compile_slate" stmt = ast.Block(statements) macro_kernel = builder.construct_macro_kernel(name=macro_kernel_name, args=args, statements=stmt) # Tell the builder to construct the final ast kernel_ast = builder.construct_ast([macro_kernel]) # Now we wrap up the kernel ast as a PyOP2 kernel. # Include the Eigen header files inc.extend(["%s/include/eigen3/" % d for d in PETSC_DIR]) op2kernel = op2.Kernel( kernel_ast, macro_kernel_name, cpp=True, include_dirs=inc, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) assert len(slate_expr.ufl_domains()) == 1, ( "No support for multiple domains yet!") # Send back a "TSFC-like" SplitKernel object with an # index and KernelInfo kinfo = KernelInfo(kernel=op2kernel, integral_type=builder.integral_type, oriented=builder.oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=tuple(range(len(expr_coeffs))), needs_cell_facets=builder.needs_cell_facets, pass_layer_arg=builder.needs_mesh_layers) idx = tuple([0] * slate_expr.rank) kernels = (SplitKernel(idx, kinfo), ) # Store the resulting kernel for reuse slate_expr._metakernel_cache = kernels return kernels
def facet_integral_loop(cxt_kernel, builder, coordsym, cellfacetsym, cell_orientations): """Generates a code statement for evaluating exterior/interior facet integrals. :arg cxt_kernel: A :namedtuple:`ContextKernel` containing all relevant integral types and TSFC kernels associated with the form nested in the expression. :arg builder: A :class:`KernelBuilder` containing the expression context. :arg coordsym: An `ast.Symbol` object representing coordinate arguments for the kernel. :arg cellfacetsym: An `ast.Symbol` representing the cell facets. :arg cell_orientations: An `ast.Symbol` representing cell orientation information. Returns: A COFFEE code statement and updated include_dirs """ exp = cxt_kernel.tensor t = builder.temps[exp] it_type = cxt_kernel.original_integral_type itsym = ast.Symbol("i0") chker = { "interior_facet": 1, "interior_facet_vert": 1, "exterior_facet": 0, "exterior_facet_vert": 0 } # Compute the correct number of facets for a particular facet measure if it_type in ["interior_facet", "exterior_facet"]: # Non-extruded case nfacet = exp.ufl_domain().ufl_cell().num_facets() elif it_type in ["interior_facet_vert", "exterior_facet_vert"]: # Extrusion case base_cell = exp.ufl_domain().ufl_cell()._cells[0] nfacet = base_cell.num_facets() else: raise ValueError("Integral type %s not supported." % it_type) incl = [] funcalls = [] checker = chker[it_type] for splitkernel in cxt_kernel.tsfc_kernels: index = splitkernel.indices kinfo = splitkernel.kinfo # Generate an iterable of coefficients to pass to the subkernel # if any are required clist = [ c for ci in kinfo.coefficient_map for c in builder.coefficient(exp.coefficients()[ci]) ] incl.extend(kinfo.kernel._include_dirs) tensor = eigen_tensor(exp, t, index) if kinfo.oriented: clist.insert(0, cell_orientations) clist.append(ast.FlatBlock("&%s" % itsym)) funcalls.append( ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist)) loop_body = ast.If( ast.Eq(ast.Symbol(cellfacetsym, rank=(itsym, )), checker), [ast.Block(funcalls, open_scope=True)]) loop_stmt = ast.For(ast.Decl("unsigned int", itsym, init=0), ast.Less(itsym, nfacet), ast.Incr(itsym, 1), loop_body) return loop_stmt, incl
def compile_expression(slate_expr, tsfc_parameters=None): """Takes a SLATE expression `slate_expr` and returns the appropriate :class:`firedrake.op2.Kernel` object representing the SLATE expression. :arg slate_expr: a :class:'TensorBase' expression. :arg tsfc_parameters: an optional `dict` of form compiler parameters to be passed onto TSFC during the compilation of ufl forms. """ if not isinstance(slate_expr, TensorBase): raise ValueError( "Expecting a `slate.TensorBase` expression, not a %r" % slate_expr) # TODO: Get PyOP2 to write into mixed dats if any(len(a.function_space()) > 1 for a in slate_expr.arguments()): raise NotImplementedError("Compiling mixed slate expressions") # Initialize shape and statements list shape = slate_expr.shape statements = [] # Create a builder for the SLATE expression builder = KernelBuilder(expression=slate_expr, tsfc_parameters=tsfc_parameters) # Initialize coordinate and facet symbols coordsym = ast.Symbol("coords") coords = None cellfacetsym = ast.Symbol("cell_facets") inc = [] # Now we construct the list of statements to provide to the builder context_temps = builder.temps.copy() for exp, t in context_temps.items(): statements.append(ast.Decl(eigen_matrixbase_type(exp.shape), t)) statements.append(ast.FlatBlock("%s.setZero();\n" % t)) for splitkernel in builder.kernel_exprs[exp]: clist = [] index = splitkernel.indices kinfo = splitkernel.kinfo integral_type = kinfo.integral_type if integral_type not in [ "cell", "interior_facet", "exterior_facet" ]: raise NotImplementedError( "Integral type %s not currently supported." % integral_type) coordinates = exp.ufl_domain().coordinates if coords is not None: assert coordinates == coords else: coords = coordinates for cindex in kinfo.coefficient_map: c = exp.coefficients()[cindex] # Handles both mixed and non-mixed coefficient cases clist.extend(builder.extract_coefficient(c)) inc.extend(kinfo.kernel._include_dirs) tensor = eigen_tensor(exp, t, index) if integral_type in ["interior_facet", "exterior_facet"]: builder.require_cell_facets() itsym = ast.Symbol("i0") clist.append(ast.FlatBlock("&%s" % itsym)) loop_body = [] nfacet = exp.ufl_domain().ufl_cell().num_facets() if integral_type == "exterior_facet": checker = 1 else: checker = 0 loop_body.append( ast.If( ast.Eq(ast.Symbol(cellfacetsym, rank=(itsym, )), checker), [ ast.Block([ ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist) ], open_scope=True) ])) loop = ast.For(ast.Decl("unsigned int", itsym, init=0), ast.Less(itsym, nfacet), ast.Incr(itsym, 1), loop_body) statements.append(loop) else: statements.append( ast.FunCall(kinfo.kernel.name, tensor, coordsym, *clist)) # Now we handle any terms that require auxiliary data (if any) if bool(builder.aux_exprs): aux_temps, aux_statements = auxiliary_information(builder) context_temps.update(aux_temps) statements.extend(aux_statements) result_sym = ast.Symbol("T%d" % len(builder.temps)) result_data_sym = ast.Symbol("A%d" % len(builder.temps)) result_type = "Eigen::Map<%s >" % eigen_matrixbase_type(shape) result = ast.Decl(SCALAR_TYPE, ast.Symbol(result_data_sym, shape)) result_statement = ast.FlatBlock( "%s %s((%s *)%s);\n" % (result_type, result_sym, SCALAR_TYPE, result_data_sym)) statements.append(result_statement) cpp_string = ast.FlatBlock( metaphrase_slate_to_cpp(slate_expr, context_temps)) statements.append(ast.Assign(result_sym, cpp_string)) # Generate arguments for the macro kernel args = [result, ast.Decl("%s **" % SCALAR_TYPE, coordsym)] for c in slate_expr.coefficients(): if isinstance(c, Constant): ctype = "%s *" % SCALAR_TYPE else: ctype = "%s **" % SCALAR_TYPE args.extend([ ast.Decl(ctype, sym_c) for sym_c in builder.extract_coefficient(c) ]) if builder.needs_cell_facets: args.append(ast.Decl("char *", cellfacetsym)) macro_kernel_name = "compile_slate" kernel_ast, oriented = builder.construct_ast( name=macro_kernel_name, args=args, statements=ast.Block(statements)) inc.extend(["%s/include/eigen3/" % d for d in PETSC_DIR]) op2kernel = op2.Kernel( kernel_ast, macro_kernel_name, cpp=True, include_dirs=inc, headers=['#include <Eigen/Dense>', '#define restrict __restrict']) assert len(slate_expr.ufl_domains()) == 1 kinfo = KernelInfo(kernel=op2kernel, integral_type="cell", oriented=oriented, subdomain_id="otherwise", domain_number=0, coefficient_map=range(len(slate_expr.coefficients())), needs_cell_facets=builder.needs_cell_facets) idx = tuple([0] * slate_expr.rank) return (SplitKernel(idx, kinfo), )
def _expression_power(expr, parameters): base, exponent = expr.children return coffee.FunCall("pow", expression(base, parameters), expression(exponent, parameters))
def _setup(self): """A setup method to initialize all the local assembly kernels generated by TSFC and creates templated function calls conforming to the Eigen-C++ template library standard. This function also collects any information regarding orientations and extra include directories. """ transformer = Transformer() include_dirs = [] templated_subkernels = [] assembly_calls = OrderedDict([(it, []) for it in self.supported_integral_types]) subdomain_calls = OrderedDict([(sd, []) for sd in self.supported_subdomain_types]) coords = None oriented = False needs_cell_sizes = False # Maps integral type to subdomain key subdomain_map = {"exterior_facet": "subdomains_exterior_facet", "exterior_facet_vert": "subdomains_exterior_facet", "interior_facet": "subdomains_interior_facet", "interior_facet_vert": "subdomains_interior_facet"} for cxt_kernel in self.context_kernels: local_coefficients = cxt_kernel.coefficients it_type = cxt_kernel.original_integral_type exp = cxt_kernel.tensor if it_type not in self.supported_integral_types: raise ValueError("Integral type '%s' not recognized" % it_type) # Explicit checking of coordinates coordinates = cxt_kernel.tensor.ufl_domain().coordinates if coords is not None: assert coordinates == coords, "Mismatching coordinates!" else: coords = coordinates for split_kernel in cxt_kernel.tsfc_kernels: indices = split_kernel.indices kinfo = split_kernel.kinfo kint_type = kinfo.integral_type needs_cell_sizes = needs_cell_sizes or kinfo.needs_cell_sizes args = [c for i in kinfo.coefficient_map for c in self.coefficient(local_coefficients[i])] if kinfo.oriented: args.insert(0, self.cell_orientations_sym) if kint_type in ["interior_facet", "exterior_facet", "interior_facet_vert", "exterior_facet_vert"]: args.append(ast.FlatBlock("&%s" % self.it_sym)) if kinfo.needs_cell_sizes: args.append(self.cell_size_sym) # Assembly calls within the macro kernel tensor = eigen_tensor(exp, self.temps[exp], indices) call = ast.FunCall(kinfo.kernel.name, tensor, self.coord_sym, *args) # Subdomains only implemented for exterior facet integrals if kinfo.subdomain_id != "otherwise": if kint_type not in subdomain_map: msg = "Subdomains for integral type '%s' not implemented" % kint_type raise NotImplementedError(msg) sd_id = kinfo.subdomain_id sd_key = subdomain_map[kint_type] subdomain_calls[sd_key].append((sd_id, call)) else: assembly_calls[it_type].append(call) # Subkernels for local assembly (Eigen templated functions) from coffee.base import Node assert isinstance(kinfo.kernel._code, Node) kast = transformer.visit(kinfo.kernel._code) templated_subkernels.append(kast) include_dirs.extend(kinfo.kernel._include_dirs) oriented = oriented or kinfo.oriented # Add subdomain call to assembly dict assembly_calls.update(subdomain_calls) self.assembly_calls = assembly_calls self.templated_subkernels = templated_subkernels self.include_dirs = list(set(include_dirs)) self.oriented = oriented self.needs_cell_sizes = needs_cell_sizes
def test_funcall_in_arrayinit(): tree = ast.ArrayInit(np.asarray([ast.FunCall("foo"), ast.Symbol("bar")])) assert tree.gencode() == "{foo(), bar}"
def ast(self): return ast.FunCall("pow", _ast(self.ufl_operands[0]), _ast(self.ufl_operands[1]))
def ast(self): return ast.FunCall("log", _ast(self.ufl_operands[0]))
def _setup(self): """A setup method to initialize all the local assembly kernels generated by TSFC and creates templated function calls conforming to the Eigen-C++ template library standard. This function also collects any information regarding orientations and extra include directories. """ transformer = Transformer() include_dirs = [] templated_subkernels = [] assembly_calls = OrderedDict([(it, []) for it in self.supported_integral_types]) coords = None oriented = False for cxt_kernel in self.context_kernels: local_coefficients = cxt_kernel.coefficients it_type = cxt_kernel.original_integral_type exp = cxt_kernel.tensor if it_type not in self.supported_integral_types: raise ValueError("Integral type '%s' not recognized" % it_type) # Explicit checking of coordinates coordinates = cxt_kernel.tensor.ufl_domain().coordinates if coords is not None: assert coordinates == coords, "Mismatching coordinates!" else: coords = coordinates for split_kernel in cxt_kernel.tsfc_kernels: indices = split_kernel.indices kinfo = split_kernel.kinfo # TODO: Implement subdomains for Slate tensors if kinfo.subdomain_id != "otherwise": raise NotImplementedError("Subdomains not implemented.") args = [ c for i in kinfo.coefficient_map for c in self.coefficient(local_coefficients[i]) ] if kinfo.oriented: args.insert(0, self.cell_orientations_sym) if kinfo.integral_type in [ "interior_facet", "exterior_facet", "interior_facet_vert", "exterior_facet_vert" ]: args.append(ast.FlatBlock("&%s" % self.it_sym)) # Assembly calls within the macro kernel tensor = eigen_tensor(exp, self.temps[exp], indices) call = ast.FunCall(kinfo.kernel.name, tensor, self.coord_sym, *args) assembly_calls[it_type].append(call) # Subkernels for local assembly (Eigen templated functions) kast = transformer.visit(kinfo.kernel._ast) templated_subkernels.append(kast) include_dirs.extend(kinfo.kernel._include_dirs) oriented = oriented or kinfo.oriented self.assembly_calls = assembly_calls self.templated_subkernels = templated_subkernels self.include_dirs = list(set(include_dirs)) self.oriented = oriented
def build_hard_fusion_kernel(base_loop, fuse_loop, fusion_map, loop_chain_index): """ Build AST and :class:`Kernel` for two loops suitable to hard fusion. The AST consists of three functions: fusion, base, fuse. base and fuse are respectively the ``base_loop`` and the ``fuse_loop`` kernels, whereas fusion is the orchestrator that invokes, for each ``base_loop`` iteration, base and, if still to be executed, fuse. The orchestrator has the following structure: :: fusion (buffer, ..., executed): base (buffer, ...) for i = 0 to arity: if not executed[i]: additional pointer staging required by kernel2 fuse (sub_buffer, ...) insertion into buffer The executed array tracks whether the i-th iteration (out of /arity/) adjacent to the main kernel1 iteration has been executed. """ finder = Find((ast.FunDecl, ast.PreprocessNode)) base = base_loop.kernel base_ast = dcopy(base._ast) base_info = finder.visit(base_ast) base_headers = base_info[ast.PreprocessNode] base_fundecl = base_info[ast.FunDecl] assert len(base_fundecl) == 1 base_fundecl = base_fundecl[0] fuse = fuse_loop.kernel fuse_ast = dcopy(fuse._ast) fuse_info = finder.visit(fuse_ast) fuse_headers = fuse_info[ast.PreprocessNode] fuse_fundecl = fuse_info[ast.FunDecl] assert len(fuse_fundecl) == 1 fuse_fundecl = fuse_fundecl[0] # Create /fusion/ arguments and signature body = ast.Block([]) fusion_name = '%s_%s' % (base_fundecl.name, fuse_fundecl.name) fusion_args = dcopy(base_fundecl.args + fuse_fundecl.args) fusion_fundecl = ast.FunDecl(base_fundecl.ret, fusion_name, fusion_args, body) # Make sure kernel and variable names are unique base_fundecl.name = "%s_base" % base_fundecl.name fuse_fundecl.name = "%s_fuse" % fuse_fundecl.name for i, decl in enumerate(fusion_args): decl.sym.symbol += '_%d' % i # Filter out duplicate arguments, and append extra arguments to the fundecl binding = WeakFilter().kernel_args([base_loop, fuse_loop], fusion_fundecl) fusion_args += [ast.Decl('int*', 'executed'), ast.Decl('int*', 'fused_iters'), ast.Decl('int', 'i')] # Which args are actually used in /fuse/, but not in /base/ ? The gather for # such arguments is moved to /fusion/, to avoid usless memory LOADs base_dats = set(a.data for a in base_loop.args) fuse_dats = set(a.data for a in fuse_loop.args) unshared = OrderedDict() for arg, decl in binding.items(): if arg.data in fuse_dats - base_dats: unshared.setdefault(decl, arg) # Track position of Args that need a postponed gather # Can't track Args themselves as they change across different parloops fargs = {fusion_args.index(i): ('postponed', False) for i in unshared.keys()} fargs.update({len(set(binding.values())): ('onlymap', True)}) # Add maps for arguments that need a postponed gather for decl, arg in unshared.items(): decl_pos = fusion_args.index(decl) fusion_args[decl_pos].sym.symbol = arg.c_arg_name() if arg._is_indirect: fusion_args[decl_pos].sym.rank = () fusion_args.insert(decl_pos + 1, ast.Decl('int*', arg.c_map_name(0, 0))) # Append the invocation of /base/; then, proceed with the invocation # of the /fuse/ kernels base_funcall_syms = [binding[a].sym.symbol for a in base_loop.args] body.children.append(ast.FunCall(base_fundecl.name, *base_funcall_syms)) for idx in range(fusion_map.arity): fused_iter = ast.Assign('i', ast.Symbol('fused_iters', (idx,))) fuse_funcall = ast.FunCall(fuse_fundecl.name) if_cond = ast.Not(ast.Symbol('executed', ('i',))) if_update = ast.Assign(ast.Symbol('executed', ('i',)), 1) if_body = ast.Block([fuse_funcall, if_update], open_scope=True) if_exec = ast.If(if_cond, [if_body]) body.children.extend([ast.FlatBlock('\n'), fused_iter, if_exec]) # Modify the /fuse/ kernel # This is to take into account that many arguments are shared with # /base/, so they will only staged once for /base/. This requires # tweaking the way the arguments are declared and accessed in /fuse/. # For example, the shared incremented array (called /buffer/ in # the pseudocode in the comment above) now needs to take offsets # to be sure the locations that /base/ is supposed to increment are # actually accessed. The same concept apply to indirect arguments. init = lambda v: '{%s}' % ', '.join([str(j) for j in v]) for i, fuse_loop_arg in enumerate(fuse_loop.args): fuse_kernel_arg = binding[fuse_loop_arg] buffer_name = '%s_vec' % fuse_kernel_arg.sym.symbol fuse_funcall_sym = ast.Symbol(buffer_name) # What kind of temporaries do we need ? if fuse_loop_arg.access == INC: op, lvalue, rvalue = ast.Incr, fuse_kernel_arg.sym.symbol, buffer_name stager = lambda b, l: b.children.extend(l) indexer = lambda indices: [(k, j) for j, k in enumerate(indices)] pointers = [] elif fuse_loop_arg.access == READ: op, lvalue, rvalue = ast.Assign, buffer_name, fuse_kernel_arg.sym.symbol stager = lambda b, l: [b.children.insert(0, j) for j in reversed(l)] indexer = lambda indices: [(j, k) for j, k in enumerate(indices)] pointers = list(fuse_kernel_arg.pointers) # Now gonna handle arguments depending on their type and rank ... if fuse_loop_arg._is_global: # ... Handle global arguments. These can be dropped in the # kernel without any particular fiddling fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol) elif fuse_kernel_arg in unshared: # ... Handle arguments that appear only in /fuse/ staging = unshared[fuse_kernel_arg].c_vec_init(False).split('\n') rvalues = [ast.FlatBlock(j.split('=')[1]) for j in staging] lvalues = [ast.Symbol(buffer_name, (j,)) for j in range(len(staging))] staging = [ast.Assign(j, k) for j, k in zip(lvalues, rvalues)] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (len(staging),)) buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, qualifiers=fuse_kernel_arg.qual, pointers=list(pointers)) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) elif fuse_loop_arg._is_mat: # ... Handle Mats staging = [] for b in fused_inc_arg._block_shape: for rc in b: lvalue = ast.Symbol(lvalue, (idx, idx), ((rc[0], 'j'), (rc[1], 'k'))) rvalue = ast.Symbol(rvalue, ('j', 'k')) staging = ItSpace(mode=0).to_for([(0, rc[0]), (0, rc[1])], ('j', 'k'), [op(lvalue, rvalue)])[:1] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (fuse_kernel_arg.sym.rank,)) buffer_init = ast.ArrayInit(init([init([0.0])])) buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init, qualifiers=fuse_kernel_arg.qual, pointers=pointers) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) elif fuse_loop_arg._is_indirect: cdim = fuse_loop_arg.data.cdim if cdim == 1 and fuse_kernel_arg.sym.rank: # [Special case] # ... Handle rank 1 indirect arguments that appear in both # /base/ and /fuse/: just point into the right location rank = (idx,) if fusion_map.arity > 1 else () fuse_funcall_sym = ast.Symbol(fuse_kernel_arg.sym.symbol, rank) else: # ... Handle indirect arguments. At the C level, these arguments # are of pointer type, so simple pointer arithmetic is used # to ensure the kernel accesses are to the correct locations fuse_arity = fuse_loop_arg.map.arity base_arity = fuse_arity*fusion_map.arity size = fuse_arity*cdim # Set the proper storage layout before invoking /fuse/ ofs_vals = [[base_arity*j + k for k in range(fuse_arity)] for j in range(cdim)] ofs_vals = [[fuse_arity*j + k for k in flatten(ofs_vals)] for j in range(fusion_map.arity)] ofs_vals = list(flatten(ofs_vals)) indices = [ofs_vals[idx*size + j] for j in range(size)] staging = [op(ast.Symbol(lvalue, (j,)), ast.Symbol(rvalue, (k,))) for j, k in indexer(indices)] # Set up the temporary buffer_symbol = ast.Symbol(buffer_name, (size,)) if fuse_loop_arg.access == INC: buffer_init = ast.ArrayInit(init([0.0])) else: buffer_init = ast.EmptyStatement() pointers.pop() buffer_decl = ast.Decl(fuse_kernel_arg.typ, buffer_symbol, buffer_init, qualifiers=fuse_kernel_arg.qual, pointers=pointers) # Update the if-then AST body stager(if_exec.children[0], staging) if_exec.children[0].children.insert(0, buffer_decl) else: # Nothing special to do for direct arguments pass # Finally update the /fuse/ funcall fuse_funcall.children.append(fuse_funcall_sym) fused_headers = set([str(h) for h in base_headers + fuse_headers]) fused_ast = ast.Root([ast.PreprocessNode(h) for h in fused_headers] + [base_fundecl, fuse_fundecl, fusion_fundecl]) return Kernel([base, fuse], fused_ast, loop_chain_index), fargs
def _expression_power(expr, parameters): base, exponent = expr.children complex_mode = int(is_complex(parameters.scalar_type)) return coffee.FunCall(math_table['power'][complex_mode], expression(base, parameters), expression(exponent, parameters))
def dg_injection_kernel(Vf, Vc, ncell): from firedrake import Tensor, AssembledVector, TestFunction, TrialFunction from firedrake.slate.slac import compile_expression macro_builder = MacroKernelBuilder(ScalarType_c, ncell) f = ufl.Coefficient(Vf) macro_builder.set_coefficients([f]) macro_builder.set_coordinates(Vf.mesh()) Vfe = create_element(Vf.ufl_element()) macro_quadrature_rule = make_quadrature( Vfe.cell, estimate_total_polynomial_degree(ufl.inner(f, f))) index_cache = {} parameters = default_parameters() integration_dim, entity_ids = lower_integral_type(Vfe.cell, "cell") macro_cfg = dict(interface=macro_builder, ufl_cell=Vf.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=macro_quadrature_rule) fexpr, = fem.compile_ufl(f, **macro_cfg) X = ufl.SpatialCoordinate(Vf.mesh()) C_a, = fem.compile_ufl(X, **macro_cfg) detJ = ufl_utils.preprocess_expression( abs(ufl.JacobianDeterminant(f.ufl_domain()))) macro_detJ, = fem.compile_ufl(detJ, **macro_cfg) Vce = create_element(Vc.ufl_element()) coarse_builder = firedrake_interface.KernelBuilder("cell", "otherwise", 0, ScalarType_c) coarse_builder.set_coordinates(Vc.mesh()) argument_multiindices = (Vce.get_indices(), ) argument_multiindex, = argument_multiindices return_variable, = coarse_builder.set_arguments((ufl.TestFunction(Vc), ), argument_multiindices) integration_dim, entity_ids = lower_integral_type(Vce.cell, "cell") # Midpoint quadrature for jacobian on coarse cell. quadrature_rule = make_quadrature(Vce.cell, 0) coarse_cfg = dict(interface=coarse_builder, ufl_cell=Vc.ufl_cell(), precision=parameters["precision"], integration_dim=integration_dim, entity_ids=entity_ids, index_cache=index_cache, quadrature_rule=quadrature_rule) X = ufl.SpatialCoordinate(Vc.mesh()) K = ufl_utils.preprocess_expression(ufl.JacobianInverse(Vc.mesh())) C_0, = fem.compile_ufl(X, **coarse_cfg) K, = fem.compile_ufl(K, **coarse_cfg) i = gem.Index() j = gem.Index() C_0 = gem.Indexed(C_0, (j, )) C_0 = gem.index_sum(C_0, quadrature_rule.point_set.indices) C_a = gem.Indexed(C_a, (j, )) X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), C_a)) K_ij = gem.Indexed(K, (i, j)) K_ij = gem.index_sum(K_ij, quadrature_rule.point_set.indices) X_a = gem.index_sum(gem.Product(K_ij, X_a), (j, )) C_0, = quadrature_rule.point_set.points C_0 = gem.Indexed(gem.Literal(C_0), (i, )) # fine quad points in coarse reference space. X_a = gem.Sum(C_0, gem.Product(gem.Literal(-1), X_a)) X_a = gem.ComponentTensor(X_a, (i, )) # Coarse basis function evaluated at fine quadrature points phi_c = fem.fiat_to_ufl( Vce.point_evaluation(0, X_a, (Vce.cell.get_dimension(), 0)), 0) tensor_indices = tuple(gem.Index(extent=d) for d in f.ufl_shape) phi_c = gem.Indexed(phi_c, argument_multiindex + tensor_indices) fexpr = gem.Indexed(fexpr, tensor_indices) quadrature_weight = macro_quadrature_rule.weight_expression expr = gem.Product(gem.IndexSum(gem.Product(phi_c, fexpr), tensor_indices), gem.Product(macro_detJ, quadrature_weight)) quadrature_indices = macro_builder.indices + macro_quadrature_rule.point_set.indices reps = spectral.Integrals([expr], quadrature_indices, argument_multiindices, parameters) assignments = spectral.flatten([(return_variable, reps)], index_cache) return_variables, expressions = zip(*assignments) expressions = impero_utils.preprocess_gem(expressions, **spectral.finalise_options) assignments = list(zip(return_variables, expressions)) impero_c = impero_utils.compile_gem(assignments, quadrature_indices + argument_multiindex, remove_zeros=True) index_names = [] def name_index(index, name): index_names.append((index, name)) if index in index_cache: for multiindex, suffix in zip(index_cache[index], string.ascii_lowercase): name_multiindex(multiindex, name + suffix) def name_multiindex(multiindex, name): if len(multiindex) == 1: name_index(multiindex[0], name) else: for i, index in enumerate(multiindex): name_index(index, name + str(i)) name_multiindex(quadrature_indices, 'ip') for multiindex, name in zip(argument_multiindices, ['j', 'k']): name_multiindex(multiindex, name) index_names.extend(zip(macro_builder.indices, ["entity"])) body = generate_coffee(impero_c, index_names, parameters["precision"], ScalarType_c) retarg = ast.Decl(ScalarType_c, ast.Symbol("R", rank=(Vce.space_dimension(), ))) local_tensor = coarse_builder.local_tensor local_tensor.init = ast.ArrayInit( numpy.zeros(Vce.space_dimension(), dtype=ScalarType_c)) body.children.insert(0, local_tensor) args = [retarg] + macro_builder.kernel_args + [ macro_builder.coordinates_arg, coarse_builder.coordinates_arg ] # Now we have the kernel that computes <f, phi_c>dx_c # So now we need to hit it with the inverse mass matrix on dx_c u = TrialFunction(Vc) v = TestFunction(Vc) expr = Tensor(ufl.inner(u, v) * ufl.dx).inv * AssembledVector( ufl.Coefficient(Vc)) Ainv, = compile_expression(expr) Ainv = Ainv.kinfo.kernel A = ast.Symbol(local_tensor.sym.symbol) R = ast.Symbol("R") body.children.append( ast.FunCall(Ainv.name, R, coarse_builder.coordinates_arg.sym, A)) from coffee.base import Node assert isinstance(Ainv._code, Node) return op2.Kernel(ast.Node([ Ainv._code, ast.FunDecl("void", "pyop2_kernel_injection_dg", args, body, pred=["static", "inline"]) ]), name="pyop2_kernel_injection_dg", cpp=True, include_dirs=Ainv._include_dirs, headers=Ainv._headers)
def _expression_maxvalue(expr, parameters): return coffee.FunCall('fmax', *[expression(c, parameters) for c in expr.children])