Пример #1
0
def ker_ind_inc():
    return ast.FunDecl(
        'void', 'ker_ind_inc', [
            ast.Decl('int', 'B', qualifiers=['unsigned'], pointers=['', '']),
            ast.Decl('int', 'A', qualifiers=['unsigned'], pointers=[''])
        ],
        ast.Block([ast.Incr(ast.Symbol('B', (0, 0)), ast.Symbol('A', (0, )))]))
Пример #2
0
def ker_write2d():
    return ast.FunDecl(
        'void', 'ker_write2d',
        [ast.Decl('int', 'V', qualifiers=['unsigned'], pointers=[''])],
        ast.Block([
            ast.Assign(ast.Symbol('V', (0, )), 1),
            ast.Assign(ast.Symbol('V', (1, )), 2)
        ]))
Пример #3
0
def get_restriction_kernel(fiat_element, unique_indices, dim=1, no_weights=False):
    weights = restriction_weights(fiat_element)[unique_indices].T
    ncdof = weights.shape[0]
    nfdof = weights.shape[1]
    arglist = [ast.Decl("double", ast.Symbol("coarse", (ncdof*dim, ))),
               ast.Decl("double *restrict *restrict ", ast.Symbol("fine", ()),
                        qualifiers=["const"])]
    if not no_weights:
        arglist.append(ast.Decl("double *restrict *restrict", ast.Symbol("count_weights", ()),
                                qualifiers=["const"]))

    all_ones = np.allclose(weights, 1.0)

    if all_ones:
        w = []
    else:
        w_sym = ast.Symbol("weights", (ncdof, nfdof))
        init = ast.ArrayInit(format_array_literal(weights))
        w = [ast.Decl("double", w_sym, init,
                      qualifiers=["const"])]

    i = ast.Symbol("i", ())
    j = ast.Symbol("j", ())
    k = ast.Symbol("k", ())
    fine = ast.Symbol("fine", (j, k))
    if no_weights:
        if all_ones:
            assign = fine
        else:
            assign = ast.Prod(fine, ast.Symbol("weights", (i, j)))
    else:
        if all_ones:
            assign = ast.Prod(fine, ast.Symbol("count_weights", (j, 0)))
        else:
            assign = ast.Prod(fine,
                              ast.Prod(ast.Symbol("weights", (i, j)),
                                       ast.Symbol("count_weights", (j, 0))))
    assignment = ast.Incr(ast.Symbol("coarse", (ast.Sum(k, ast.Prod(i, ast.c_sym(dim))),)),
                          assign)
    k_loop = ast.For(ast.Decl("int", k, ast.c_sym(0)),
                     ast.Less(k, ast.c_sym(dim)),
                     ast.Incr(k, ast.c_sym(1)),
                     ast.Block([assignment], open_scope=True))
    j_loop = ast.For(ast.Decl("int", j, ast.c_sym(0)),
                     ast.Less(j, ast.c_sym(nfdof)),
                     ast.Incr(j, ast.c_sym(1)),
                     ast.Block([k_loop], open_scope=True))
    i_loop = ast.For(ast.Decl("int", i, ast.c_sym(0)),
                     ast.Less(i, ast.c_sym(ncdof)),
                     ast.Incr(i, ast.c_sym(1)),
                     ast.Block([j_loop], open_scope=True))
    k = ast.FunDecl("void", "restriction", arglist, ast.Block(w + [i_loop]),
                    pred=["static", "inline"])

    return op2.Kernel(k, "restriction", opts=parameters["coffee"])
Пример #4
0
def get_count_kernel(arity):
    arglist = [ast.Decl("double", ast.Symbol("weight", (arity, )))]
    i = ast.Symbol("i", ())
    assignment = ast.Incr(ast.Symbol("weight", (i, )), ast.c_sym(1.0))
    loop = ast.For(ast.Decl("int", i, ast.c_sym(0)),
                   ast.Less(i, ast.c_sym(arity)),
                   ast.Incr(i, ast.c_sym(1)),
                   ast.Block([assignment], open_scope=True))
    k = ast.FunDecl("void", "count_weights", arglist,
                    ast.Block([loop]),
                    pred=["static", "inline"])
    return op2.Kernel(k, "count_weights", opts=parameters["coffee"])
Пример #5
0
    def construct_ast(self, name, args, statements):
        """Constructs the full kernel AST of a given SLATE expression. The :class:`Transformer`
        is used to perform the conversion from standard C into the Eigen C++ template library
        syntax.

        :arg name: a string denoting the name of the macro kernel.
        :arg args: a list of arguments for the macro_kernel.
        :arg statements: a `coffee.base.Block` of instructions, which contains declarations
                         of temporaries, function calls to all subkernels and any auxilliary
                         information needed to evaulate the SLATE expression.
                         E.g. facet integral loops and action loops.

        Returns: the full kernel AST to be converted into a PyOP2 kernel, as well as any
                 orientation information.
        """
        # all kernel body statements must be wrapped up as a coffee.base.Block
        assert isinstance(statements, ast.Block)

        macro_kernel = ast.FunDecl("void",
                                   name,
                                   args,
                                   statements,
                                   pred=["static", "inline"])

        kernel_list = []
        transformer = Transformer()
        oriented = False
        # Assume self.kernel_exprs is populated at this point
        for kernel_items in self.kernel_exprs.values():
            for ks in kernel_items:
                oriented = oriented or ks.kinfo.oriented
                # TODO: Extend multiple domains support
                assert ks.kinfo.subdomain_id == "otherwise"
                kast = transformer.visit(ks.kinfo.kernel._ast)
                kernel_list.append(kast)
        kernel_list.append(macro_kernel)

        return ast.Node(kernel_list), oriented
Пример #6
0
    def ast_matmul(self, F_a, implementation='optimized'):
        """Generate an AST for a PyOP2 kernel performing a matrix-vector multiplication."""

        # The number of dofs on each element is /ndofs*cdim/
        F_a_fs = F_a.function_space()
        ndofs = F_a_fs.fiat_element.entity_dofs()
        ndofs = sum(self.mesh.make_dofs_per_plex_entity(ndofs))
        cdim = F_a_fs.dim
        name = 'mat_vec_mul_kernel_%s' % F_a_fs.name

        identifier = (ndofs, cdim, name, implementation)
        if identifier in self.asts:
            return self.asts[identifier]

        from coffee import isa, options
        if cdim and cdim % isa['dp_reg'] == 0:
            simd_pragma = '#pragma simd reduction(+:sum)'
        else:
            simd_pragma = ''

        # Craft the AST
        if implementation == 'optimized' and cdim >= 4:
            body = ast.Incr(
                ast.Symbol('sum'),
                ast.Prod(
                    ast.Symbol('A', ('i', ),
                               ((ndofs * cdim, 'j*%d + k' % cdim), )),
                    ast.Symbol('B', ('j', 'k'))))
            body = ast.c_for('k', cdim, body, simd_pragma).children[0]
            body = [
                ast.Decl('const int',
                         ast.Symbol('index'),
                         init=ast.Symbol('i%%%d' % cdim)),
                ast.Decl('double', ast.Symbol('sum'), init=ast.Symbol('0.0')),
                ast.c_for('j', ndofs, body).children[0],
                ast.Assign(ast.Symbol('C', ('i/%d' % cdim, 'index')), 'sum')
            ]
            body = ast.Block([ast.c_for('i', ndofs * cdim, body).children[0]])
            funargs = [
                ast.Decl('double* restrict', 'A'),
                ast.Decl('double *restrict *restrict', 'B'),
                ast.Decl('double *restrict *', 'C')
            ]
            fundecl = ast.FunDecl('void', name, funargs, body,
                                  ['static', 'inline'])
        else:
            body = ast.Incr(
                ast.Symbol('C', ('i/%d' % cdim, 'index')),
                ast.Prod(
                    ast.Symbol('A', ('i', ),
                               ((ndofs * cdim, 'j*%d + k' % cdim), )),
                    ast.Symbol('B', ('j', 'k'))))
            body = ast.c_for('k', cdim, body).children[0]
            body = [
                ast.Decl('const int',
                         ast.Symbol('index'),
                         init=ast.Symbol('i%%%d' % cdim)),
                ast.Assign(ast.Symbol('C', ('i/%d' % cdim, 'index' % cdim)),
                           '0.0'),
                ast.c_for('j', ndofs, body).children[0]
            ]
            body = ast.Block([ast.c_for('i', ndofs * cdim, body).children[0]])
            funargs = [
                ast.Decl('double* restrict', 'A'),
                ast.Decl('double *restrict *restrict', 'B'),
                ast.Decl('double *restrict *', 'C')
            ]
            fundecl = ast.FunDecl('void', name, funargs, body,
                                  ['static', 'inline'])

        # Track the AST for later fast retrieval
        self.asts[identifier] = fundecl

        return fundecl
Пример #7
0
    def exterior_facet_boundary_node_map(self, method):
        '''The :class:`pyop2.Map` from exterior facets to the nodes on
        those facets. Note that this differs from
        :meth:`exterior_facet_node_map` in that only surface nodes
        are referenced, not all nodes in cells touching the surface.

        :arg method: The method for determining boundary nodes. See
            :class:`~.bcs.DirichletBC`.
        '''

        el = self.fiat_element

        dim = self._mesh.facet_dimension()

        if method == "topological":
            boundary_dofs = el.entity_closure_dofs()[dim]
        elif method == "geometric":
            boundary_dofs = el.facet_support_dofs()

        nodes_per_facet = \
            len(boundary_dofs[0])

        # HACK ALERT
        # The facet set does not have a halo associated with it, since
        # we only construct halos for DoF sets.  Fortunately, this
        # loop is direct and we already have all the correct
        # information available locally.  So We fake a set of the
        # correct size and carry out a direct loop
        facet_set = op2.Set(self._mesh.exterior_facets.set.total_size)

        fs_dat = op2.Dat(facet_set**el.space_dimension(),
                         data=self.exterior_facet_node_map().values_with_halo)

        facet_dat = op2.Dat(facet_set**nodes_per_facet, dtype=np.int32)

        local_facet_nodes = np.array(
            [dofs for e, dofs in boundary_dofs.iteritems()])

        # Helper function to turn the inner index of an array into c
        # array literals.
        c_array = lambda xs: "{" + ", ".join(map(str, xs)) + "}"

        body = ast.Block([
            ast.Decl("int",
                     ast.Symbol("l_nodes",
                                (len(el.get_reference_element().topology[dim]),
                                 nodes_per_facet)),
                     init=ast.ArrayInit(
                         c_array(map(c_array, local_facet_nodes))),
                     qualifiers=["const"]),
            ast.For(
                ast.Decl("int", "n", 0), ast.Less("n", nodes_per_facet),
                ast.Incr("n", 1),
                ast.Assign(
                    ast.Symbol("facet_nodes", ("n", )),
                    ast.Symbol("cell_nodes", ("l_nodes[facet[0]][n]", ))))
        ])

        kernel = op2.Kernel(
            ast.FunDecl("void", "create_bc_node_map", [
                ast.Decl("int*", "cell_nodes"),
                ast.Decl("int*", "facet_nodes"),
                ast.Decl("unsigned int*", "facet")
            ], body), "create_bc_node_map")

        local_facet_dat = op2.Dat(
            facet_set**self._mesh.exterior_facets._rank,
            self._mesh.exterior_facets.local_facet_dat.data_ro_with_halos,
            dtype=np.uintc)
        op2.par_loop(kernel, facet_set, fs_dat(op2.READ), facet_dat(op2.WRITE),
                     local_facet_dat(op2.READ))

        if isinstance(self._mesh, mesh_t.ExtrudedMesh):
            offset = self.offset[boundary_dofs[0]]
        else:
            offset = None
        return op2.Map(facet_set,
                       self.node_set,
                       nodes_per_facet,
                       facet_dat.data_ro_with_halos,
                       name="exterior_facet_boundary_node",
                       offset=offset)