def ker_ind_inc(): return ast.FunDecl( 'void', 'ker_ind_inc', [ ast.Decl('int', 'B', qualifiers=['unsigned'], pointers=['', '']), ast.Decl('int', 'A', qualifiers=['unsigned'], pointers=['']) ], ast.Block([ast.Incr(ast.Symbol('B', (0, 0)), ast.Symbol('A', (0, )))]))
def ker_write2d(): return ast.FunDecl( 'void', 'ker_write2d', [ast.Decl('int', 'V', qualifiers=['unsigned'], pointers=[''])], ast.Block([ ast.Assign(ast.Symbol('V', (0, )), 1), ast.Assign(ast.Symbol('V', (1, )), 2) ]))
def get_restriction_kernel(fiat_element, unique_indices, dim=1, no_weights=False): weights = restriction_weights(fiat_element)[unique_indices].T ncdof = weights.shape[0] nfdof = weights.shape[1] arglist = [ast.Decl("double", ast.Symbol("coarse", (ncdof*dim, ))), ast.Decl("double *restrict *restrict ", ast.Symbol("fine", ()), qualifiers=["const"])] if not no_weights: arglist.append(ast.Decl("double *restrict *restrict", ast.Symbol("count_weights", ()), qualifiers=["const"])) all_ones = np.allclose(weights, 1.0) if all_ones: w = [] else: w_sym = ast.Symbol("weights", (ncdof, nfdof)) init = ast.ArrayInit(format_array_literal(weights)) w = [ast.Decl("double", w_sym, init, qualifiers=["const"])] i = ast.Symbol("i", ()) j = ast.Symbol("j", ()) k = ast.Symbol("k", ()) fine = ast.Symbol("fine", (j, k)) if no_weights: if all_ones: assign = fine else: assign = ast.Prod(fine, ast.Symbol("weights", (i, j))) else: if all_ones: assign = ast.Prod(fine, ast.Symbol("count_weights", (j, 0))) else: assign = ast.Prod(fine, ast.Prod(ast.Symbol("weights", (i, j)), ast.Symbol("count_weights", (j, 0)))) assignment = ast.Incr(ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))),)), assign) k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)), ast.Less(k, ast.c_sym(dim)), ast.Incr(k, ast.c_sym(1)), ast.Block([assignment], open_scope=True)) j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)), ast.Less(j, ast.c_sym(nfdof)), ast.Incr(j, ast.c_sym(1)), ast.Block([k_loop], open_scope=True)) i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)), ast.Less(i, ast.c_sym(ncdof)), ast.Incr(i, ast.c_sym(1)), ast.Block([j_loop], open_scope=True)) k = ast.FunDecl("void", "restriction", arglist, ast.Block(w + [i_loop]), pred=["static", "inline"]) return op2.Kernel(k, "restriction", opts=parameters["coffee"])
def get_count_kernel(arity): arglist = [ast.Decl("double", ast.Symbol("weight", (arity, )))] i = ast.Symbol("i", ()) assignment = ast.Incr(ast.Symbol("weight", (i, )), ast.c_sym(1.0)) loop = ast.For(ast.Decl("int", i, ast.c_sym(0)), ast.Less(i, ast.c_sym(arity)), ast.Incr(i, ast.c_sym(1)), ast.Block([assignment], open_scope=True)) k = ast.FunDecl("void", "count_weights", arglist, ast.Block([loop]), pred=["static", "inline"]) return op2.Kernel(k, "count_weights", opts=parameters["coffee"])
def construct_ast(self, name, args, statements): """Constructs the full kernel AST of a given SLATE expression. The :class:`Transformer` is used to perform the conversion from standard C into the Eigen C++ template library syntax. :arg name: a string denoting the name of the macro kernel. :arg args: a list of arguments for the macro_kernel. :arg statements: a `coffee.base.Block` of instructions, which contains declarations of temporaries, function calls to all subkernels and any auxilliary information needed to evaulate the SLATE expression. E.g. facet integral loops and action loops. Returns: the full kernel AST to be converted into a PyOP2 kernel, as well as any orientation information. """ # all kernel body statements must be wrapped up as a coffee.base.Block assert isinstance(statements, ast.Block) macro_kernel = ast.FunDecl("void", name, args, statements, pred=["static", "inline"]) kernel_list = [] transformer = Transformer() oriented = False # Assume self.kernel_exprs is populated at this point for kernel_items in self.kernel_exprs.values(): for ks in kernel_items: oriented = oriented or ks.kinfo.oriented # TODO: Extend multiple domains support assert ks.kinfo.subdomain_id == "otherwise" kast = transformer.visit(ks.kinfo.kernel._ast) kernel_list.append(kast) kernel_list.append(macro_kernel) return ast.Node(kernel_list), oriented
def ast_matmul(self, F_a, implementation='optimized'): """Generate an AST for a PyOP2 kernel performing a matrix-vector multiplication.""" # The number of dofs on each element is /ndofs*cdim/ F_a_fs = F_a.function_space() ndofs = F_a_fs.fiat_element.entity_dofs() ndofs = sum(self.mesh.make_dofs_per_plex_entity(ndofs)) cdim = F_a_fs.dim name = 'mat_vec_mul_kernel_%s' % F_a_fs.name identifier = (ndofs, cdim, name, implementation) if identifier in self.asts: return self.asts[identifier] from coffee import isa, options if cdim and cdim % isa['dp_reg'] == 0: simd_pragma = '#pragma simd reduction(+:sum)' else: simd_pragma = '' # Craft the AST if implementation == 'optimized' and cdim >= 4: body = ast.Incr( ast.Symbol('sum'), ast.Prod( ast.Symbol('A', ('i', ), ((ndofs * cdim, 'j*%d + k' % cdim), )), ast.Symbol('B', ('j', 'k')))) body = ast.c_for('k', cdim, body, simd_pragma).children[0] body = [ ast.Decl('const int', ast.Symbol('index'), init=ast.Symbol('i%%%d' % cdim)), ast.Decl('double', ast.Symbol('sum'), init=ast.Symbol('0.0')), ast.c_for('j', ndofs, body).children[0], ast.Assign(ast.Symbol('C', ('i/%d' % cdim, 'index')), 'sum') ] body = ast.Block([ast.c_for('i', ndofs * cdim, body).children[0]]) funargs = [ ast.Decl('double* restrict', 'A'), ast.Decl('double *restrict *restrict', 'B'), ast.Decl('double *restrict *', 'C') ] fundecl = ast.FunDecl('void', name, funargs, body, ['static', 'inline']) else: body = ast.Incr( ast.Symbol('C', ('i/%d' % cdim, 'index')), ast.Prod( ast.Symbol('A', ('i', ), ((ndofs * cdim, 'j*%d + k' % cdim), )), ast.Symbol('B', ('j', 'k')))) body = ast.c_for('k', cdim, body).children[0] body = [ ast.Decl('const int', ast.Symbol('index'), init=ast.Symbol('i%%%d' % cdim)), ast.Assign(ast.Symbol('C', ('i/%d' % cdim, 'index' % cdim)), '0.0'), ast.c_for('j', ndofs, body).children[0] ] body = ast.Block([ast.c_for('i', ndofs * cdim, body).children[0]]) funargs = [ ast.Decl('double* restrict', 'A'), ast.Decl('double *restrict *restrict', 'B'), ast.Decl('double *restrict *', 'C') ] fundecl = ast.FunDecl('void', name, funargs, body, ['static', 'inline']) # Track the AST for later fast retrieval self.asts[identifier] = fundecl return fundecl
def exterior_facet_boundary_node_map(self, method): '''The :class:`pyop2.Map` from exterior facets to the nodes on those facets. Note that this differs from :meth:`exterior_facet_node_map` in that only surface nodes are referenced, not all nodes in cells touching the surface. :arg method: The method for determining boundary nodes. See :class:`~.bcs.DirichletBC`. ''' el = self.fiat_element dim = self._mesh.facet_dimension() if method == "topological": boundary_dofs = el.entity_closure_dofs()[dim] elif method == "geometric": boundary_dofs = el.facet_support_dofs() nodes_per_facet = \ len(boundary_dofs[0]) # HACK ALERT # The facet set does not have a halo associated with it, since # we only construct halos for DoF sets. Fortunately, this # loop is direct and we already have all the correct # information available locally. So We fake a set of the # correct size and carry out a direct loop facet_set = op2.Set(self._mesh.exterior_facets.set.total_size) fs_dat = op2.Dat(facet_set**el.space_dimension(), data=self.exterior_facet_node_map().values_with_halo) facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=np.int32) local_facet_nodes = np.array( [dofs for e, dofs in boundary_dofs.iteritems()]) # Helper function to turn the inner index of an array into c # array literals. c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}" body = ast.Block([ ast.Decl("int", ast.Symbol("l_nodes", (len(el.get_reference_element().topology[dim]), nodes_per_facet)), init=ast.ArrayInit( c_array(map(c_array, local_facet_nodes))), qualifiers=["const"]), ast.For( ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet), ast.Incr("n", 1), ast.Assign( ast.Symbol("facet_nodes", ("n", )), ast.Symbol("cell_nodes", ("l_nodes[facet[0]][n]", )))) ]) kernel = op2.Kernel( ast.FunDecl("void", "create_bc_node_map", [ ast.Decl("int*", "cell_nodes"), ast.Decl("int*", "facet_nodes"), ast.Decl("unsigned int*", "facet") ], body), "create_bc_node_map") local_facet_dat = op2.Dat( facet_set**self._mesh.exterior_facets._rank, self._mesh.exterior_facets.local_facet_dat.data_ro_with_halos, dtype=np.uintc) op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE), local_facet_dat(op2.READ)) if isinstance(self._mesh, mesh_t.ExtrudedMesh): offset = self.offset[boundary_dofs[0]] else: offset = None return op2.Map(facet_set, self.node_set, nodes_per_facet, facet_dat.data_ro_with_halos, name="exterior_facet_boundary_node", offset=offset)