Пример #1
0
def test_krylov_solver_lu():

    mesh = UnitSquareMesh(MPI.comm_world, 12, 12)
    V = FunctionSpace(mesh, ("Lagrange", 1))
    u, v = TrialFunction(V), TestFunction(V)

    a = inner(u, v) * dx
    L = inner(1.0, v) * dx
    A = assemble_matrix(a)
    A.assemble()
    b = assemble_vector(L)
    b.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)

    norm = 13.0

    solver = PETScKrylovSolver(mesh.mpi_comm())
    solver.set_options_prefix("test_lu_")
    PETScOptions.set("test_lu_ksp_type", "preonly")
    PETScOptions.set("test_lu_pc_type", "lu")
    solver.set_from_options()
    x = A.createVecRight()
    solver.set_operator(A)
    solver.solve(x, b)

    # *Tight* tolerance for LU solves
    assert round(x.norm(PETSc.NormType.N2) - norm, 12) == 0
Пример #2
0
    def amg_solve(N, method):
        # Elasticity parameters
        E = 1.0e9
        nu = 0.3
        mu = E / (2.0 * (1.0 + nu))
        lmbda = E * nu / ((1.0 + nu) * (1.0 - 2.0 * nu))

        # Stress computation
        def sigma(v):
            return 2.0 * mu * sym(grad(v)) + lmbda * tr(sym(
                grad(v))) * Identity(2)

        # Define problem
        mesh = UnitSquareMesh(MPI.comm_world, N, N)
        V = VectorFunctionSpace(mesh, 'Lagrange', 1)
        bc0 = Function(V)
        with bc0.vector().localForm() as bc_local:
            bc_local.set(0.0)

        def boundary(x, only_boundary):
            return [only_boundary] * x.shape(0)

        bc = DirichletBC(V.sub(0), bc0, boundary)
        u = TrialFunction(V)
        v = TestFunction(V)

        # Forms
        a, L = inner(sigma(u), grad(v)) * dx, dot(ufl.as_vector(
            (1.0, 1.0)), v) * dx

        # Assemble linear algebra objects
        A = assemble_matrix(a, [bc])
        A.assemble()
        b = assemble_vector(L)
        apply_lifting(b, [a], [[bc]])
        b.ghostUpdate(addv=PETSc.InsertMode.ADD,
                      mode=PETSc.ScatterMode.REVERSE)
        set_bc(b, [bc])

        # Create solution function
        u = Function(V)

        # Create near null space basis and orthonormalize
        null_space = build_nullspace(V, u.vector())

        # Attached near-null space to matrix
        A.set_near_nullspace(null_space)

        # Test that basis is orthonormal
        assert null_space.is_orthonormal()

        # Create PETSC smoothed aggregation AMG preconditioner, and
        # create CG solver
        solver = PETScKrylovSolver("cg", method)

        # Set matrix operator
        solver.set_operator(A)

        # Compute solution and return number of iterations
        return solver.solve(u.vector(), b)
Пример #3
0
    def amg_solve(N, method):
        # Elasticity parameters
        E = 1.0e9
        nu = 0.3
        mu = E / (2.0 * (1.0 + nu))
        lmbda = E * nu / ((1.0 + nu) * (1.0 - 2.0 * nu))

        # Stress computation
        def sigma(v):
            return 2.0 * mu * sym(grad(v)) + lmbda * tr(sym(
                grad(v))) * Identity(2)

        # Define problem
        mesh = UnitSquareMesh(MPI.comm_world, N, N)
        V = VectorFunctionSpace(mesh, 'Lagrange', 1)
        bc0 = Function(V)
        bc = DirichletBC(V.sub(0), bc0, lambda x, on_boundary: on_boundary)
        u = TrialFunction(V)
        v = TestFunction(V)

        # Forms
        a, L = inner(sigma(u), grad(v)) * dx, dot(ufl.as_vector(
            (1.0, 1.0)), v) * dx

        # Assemble linear algebra objects
        A, b = assemble_system(a, L, bc)

        # Create solution function
        u = Function(V)

        # Create near null space basis and orthonormalize
        null_space = build_nullspace(V, u.vector())

        # Attached near-null space to matrix
        A.set_near_nullspace(null_space)

        # Test that basis is orthonormal
        assert null_space.is_orthonormal()

        # Create PETSC smoothed aggregation AMG preconditioner, and
        # create CG solver
        solver = PETScKrylovSolver("cg", method)

        # Set matrix operator
        solver.set_operator(A)

        # Compute solution and return number of iterations
        return solver.solve(u.vector(), b)
Пример #4
0
def test_krylov_solver_lu():

    mesh = UnitSquareMesh(MPI.comm_world, 12, 12)
    V = FunctionSpace(mesh, ("Lagrange", 1))
    u, v = TrialFunction(V), TestFunction(V)

    a = Constant(1.0) * inner(u, v) * dx
    L = inner(Constant(1.0), v) * dx
    A = assemble(a)
    b = assemble(L)

    norm = 13.0

    solver = PETScKrylovSolver(mesh.mpi_comm())
    solver.set_options_prefix("test_lu_")
    PETScOptions.set("test_lu_ksp_type", "preonly")
    PETScOptions.set("test_lu_pc_type", "lu")
    solver.set_from_options()
    x = PETScVector()
    solver.set_operator(A)
    solver.solve(x, b)

    # *Tight* tolerance for LU solves
    assert round(x.norm(cpp.la.Norm.l2) - norm, 12) == 0
Пример #5
0
def xtest_mg_solver_stokes():

    mesh0 = UnitCubeMesh(2, 2, 2)
    mesh1 = UnitCubeMesh(4, 4, 4)
    mesh2 = UnitCubeMesh(8, 8, 8)

    Ve = VectorElement("CG", mesh0.ufl_cell(), 2)
    Qe = FiniteElement("CG", mesh0.ufl_cell(), 1)
    Ze = MixedElement([Ve, Qe])

    Z0 = FunctionSpace(mesh0, Ze)
    Z1 = FunctionSpace(mesh1, Ze)
    Z2 = FunctionSpace(mesh2, Ze)
    W = Z2

    # Boundaries
    def right(x, on_boundary):
        return x[0] > (1.0 - DOLFIN_EPS)

    def left(x, on_boundary):
        return x[0] < DOLFIN_EPS

    def top_bottom(x, on_boundary):
        return x[1] > 1.0 - DOLFIN_EPS or x[1] < DOLFIN_EPS

    # No-slip boundary condition for velocity
    noslip = Constant((0.0, 0.0, 0.0))
    bc0 = DirichletBC(W.sub(0), noslip, top_bottom)

    # Inflow boundary condition for velocity
    inflow = Expression(("-sin(x[1]*pi)", "0.0", "0.0"), degree=2)
    bc1 = DirichletBC(W.sub(0), inflow, right)

    # Collect boundary conditions
    bcs = [bc0, bc1]

    # Define variational problem
    (u, p) = TrialFunctions(W)
    (v, q) = TestFunctions(W)
    f = Constant((0.0, 0.0, 0.0))
    a = inner(grad(u), grad(v)) * dx + div(v) * p * dx + q * div(u) * dx
    L = inner(f, v) * dx

    # Form for use in constructing preconditioner matrix
    b = inner(grad(u), grad(v)) * dx + p * q * dx

    # Assemble system
    A, bb = assemble_system(a, L, bcs)

    # Assemble preconditioner system
    P, btmp = assemble_system(b, L, bcs)

    spaces = [Z0, Z1, Z2]
    dm_collection = PETScDMCollection(spaces)

    solver = PETScKrylovSolver()
    solver.set_operators(A, P)

    PETScOptions.set("ksp_type", "gcr")
    PETScOptions.set("pc_type", "mg")
    PETScOptions.set("pc_mg_levels", 3)
    PETScOptions.set("pc_mg_galerkin")
    PETScOptions.set("ksp_monitor_true_residual")

    PETScOptions.set("ksp_atol", 1.0e-10)
    PETScOptions.set("ksp_rtol", 1.0e-10)
    solver.set_from_options()

    from petsc4py import PETSc

    ksp = solver.ksp()

    ksp.setDM(dm_collection.dm())
    ksp.setDMActive(False)

    x = PETScVector()
    solver.solve(x, bb)

    # Check multigrid solution against LU solver
    solver = LUSolver(A)  # noqa
    x_lu = PETScVector()
    solver.solve(x_lu, bb)
    assert round((x - x_lu).norm("l2"), 10) == 0

    # Clear all PETSc options
    opts = PETSc.Options()
    for key in opts.getAll():
        opts.delValue(key)
Пример #6
0
def test_mg_solver_laplace():

    # Create meshes and function spaces
    meshes = [UnitSquareMesh(N, N) for N in [16, 32, 64]]
    V = [FunctionSpace(mesh, "Lagrange", 1) for mesh in meshes]

    # Create variational problem on fine grid
    u, v = TrialFunction(V[-1]), TestFunction(V[-1])
    a = dot(grad(u), grad(v)) * dx
    L = v * dx
    bc = DirichletBC(V[-1], Constant(0.0), "on_boundary")
    A, b = assemble_system(a, L, bc)

    # Create collection of PETSc DM objects
    dm_collection = PETScDMCollection(V)

    # Create PETSc Krylov solver and set operator
    solver = PETScKrylovSolver()
    solver.set_operator(A)

    # Set PETSc solver type
    PETScOptions.set("ksp_type", "richardson")
    PETScOptions.set("pc_type", "mg")

    # Set PETSc MG type and levels
    PETScOptions.set("pc_mg_levels", len(V))
    PETScOptions.set("pc_mg_galerkin")

    # Set smoother
    PETScOptions.set("mg_levels_ksp_type", "chebyshev")
    PETScOptions.set("mg_levels_pc_type", "jacobi")

    # Set tolerance and monitor residual
    PETScOptions.set("ksp_monitor_true_residual")
    PETScOptions.set("ksp_atol", 1.0e-12)
    PETScOptions.set("ksp_rtol", 1.0e-12)
    solver.set_from_options()

    # Get fine grid DM and attach fine grid DM to solver
    solver.set_dm(dm_collection.get_dm(-1))
    solver.set_dm_active(False)

    # Solve
    x = PETScVector()
    solver.solve(x, b)

    # Check multigrid solution against LU solver solution
    solver = LUSolver(A)  # noqa
    x_lu = PETScVector()
    solver.solve(x_lu, b)
    assert round((x - x_lu).norm("l2"), 10) == 0

    # Clear all PETSc options
    from petsc4py import PETSc
    opts = PETSc.Options()
    for key in opts.getAll():
        opts.delValue(key)
Пример #7
0
PETScOptions.set("ksp_rtol", 1.0e-12)
PETScOptions.set("pc_type", "gamg")

# Use Chebyshev smoothing for multigrid
PETScOptions.set("mg_levels_ksp_type", "chebyshev")
PETScOptions.set("mg_levels_pc_type", "jacobi")

# Improve estimate of eigenvalues for Chebyshev smoothing
PETScOptions.set("mg_levels_esteig_ksp_type", "cg")
PETScOptions.set("mg_levels_ksp_chebyshev_esteig_steps", 20)

# Monitor solver
PETScOptions.set("ksp_monitor")

# Create CG Krylov solver and turn convergence monitoring on
solver = PETScKrylovSolver(MPI.comm_world)
# solver = PETScKrylovSolver()
solver.set_from_options()

# Set matrix operator
solver.set_operator(A)

# Compute solution
solver.solve(u.vector(), b)

# Save solution to XDMF format
file = XDMFFile(MPI.comm_world, "elasticity.xdmf")
file.write(u)

unorm = u.vector().norm(dolfin.cpp.la.Norm.l2)
if MPI.rank(mesh.mpi_comm()) == 0:
Пример #8
0
def solve():
    # Whether to use custom Numba kernels instead of FFC
    useCustomKernels = True

    # Generate a unit cube with (n+1)^3 vertices
    n = 22
    mesh = UnitCubeMesh(MPI.comm_world, n, n, n)
    Q = FunctionSpace(mesh, "Lagrange", 1)

    u = TrialFunction(Q)
    v = TestFunction(Q)

    # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1
    def boundary(x):
        return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS),
                      axis=1) > 0

    u0 = Constant(0.0)
    bc = DirichletBC(Q, u0, boundary)

    # Initialize bilinear form and rhs
    a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object])
    L = dolfin.cpp.fem.Form([Q._cpp_object])

    # Signature of tabulate_tensor functions
    sig = nb.types.void(nb.types.CPointer(nb.types.double),
                        nb.types.CPointer(nb.types.CPointer(nb.types.double)),
                        nb.types.CPointer(nb.types.double), nb.types.intc)

    # Compile the python functions using Numba
    fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A)
    fnL = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_L)

    module_name = "_laplace_kernel"

    # Build the kernel
    ffi = cffi.FFI()
    ffi.set_source(module_name, TABULATE_C)
    ffi.cdef(TABULATE_H)
    ffi.compile()

    # Import the compiled kernel
    kernel_mod = importlib.import_module(module_name)
    ffi, lib = kernel_mod.ffi, kernel_mod.lib

    # Get pointer to the compiled function
    fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A"))

    # Get pointers to Numba functions
    #fnA_ptr = fnA.address
    fnL_ptr = fnL.address

    if useCustomKernels:
        # Configure Forms to use own tabulate functions
        a.set_cell_tabulate(0, fnA_ptr)
        L.set_cell_tabulate(0, fnL_ptr)
    else:
        # Use FFC

        # Bilinear form
        jit_result = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])

        # Rhs
        f = Expression("2.0", element=Q.ufl_element())
        jit_result = ffc_jit(f * v * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object])
        # Attach rhs expression as coefficient
        L.set_coefficient(0, f._cpp_object)

    assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc])
    A = PETScMatrix()
    b = PETScVector()

    # Perform assembly
    start = time.time()
    assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic)
    end = time.time()

    # We don't care about the RHS
    assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic)

    print(f"Time for assembly: {(end-start)*1000.0}ms")

    Anorm = A.norm(dolfin.cpp.la.Norm.frobenius)
    bnorm = b.norm(dolfin.cpp.la.Norm.l2)
    print(Anorm, bnorm)

    # Norms obtained with FFC and n=13
    assert (np.isclose(Anorm, 60.86192203436385))
    assert (np.isclose(bnorm, 0.018075523965828778))

    comm = L.mesh().mpi_comm()
    solver = PETScKrylovSolver(comm)

    u = Function(Q)
    solver.set_operator(A)
    solver.solve(u.vector(), b)

    # Export result
    file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf")
    file.write(u, XDMFFile.Encoding.HDF5)
Пример #9
0
def test_krylov_reuse_pc_lu():
    """Test that LU re-factorisation is only performed after
    set_operator(A) is called"""

    # Test requires PETSc version 3.5 or later. Use petsc4py to check
    # version number.
    try:
        from petsc4py import PETSc
    except ImportError:
        pytest.skip("petsc4py required to check PETSc version")
    else:
        if not PETSc.Sys.getVersion() >= (3, 5, 0):
            pytest.skip("PETSc version must be 3.5  of higher")

    mesh = UnitSquareMesh(MPI.comm_world, 12, 12)
    V = FunctionSpace(mesh, ("Lagrange", 1))
    u, v = TrialFunction(V), TestFunction(V)

    a = Constant(1.0) * u * v * dx
    L = Constant(1.0) * v * dx
    assembler = fem.Assembler(a, L)
    A = assembler.assemble_matrix()
    b = assembler.assemble_vector()
    norm = 13.0

    solver = PETScKrylovSolver(mesh.mpi_comm())
    solver.set_options_prefix("test_lu_")
    PETScOptions.set("test_lu_ksp_type", "preonly")
    PETScOptions.set("test_lu_pc_type", "lu")
    solver.set_from_options()
    solver.set_operator(A)
    x = PETScVector(mesh.mpi_comm())
    solver.solve(x, b)
    assert round(x.norm(cpp.la.Norm.l2) - norm, 10) == 0

    assembler = fem.assemble.Assembler(Constant(0.5) * u * v * dx, L)
    assembler.assemble(A)
    x = PETScVector(mesh.mpi_comm())
    solver.solve(x, b)
    assert round(x.norm(cpp.la.Norm.l2) - 2.0 * norm, 10) == 0

    solver.set_operator(A)
    solver.solve(x, b)
    assert round(x.norm(cpp.la.Norm.l2) - 2.0 * norm, 10) == 0
Пример #10
0
def test_krylov_reuse_pc():
    "Test preconditioner re-use with PETScKrylovSolver"

    # Define problem
    mesh = UnitSquareMesh(MPI.comm_world, 8, 8)
    V = FunctionSpace(mesh, ('Lagrange', 1))
    bc = DirichletBC(V, Constant(0.0), lambda x, on_boundary: on_boundary)
    u = TrialFunction(V)
    v = TestFunction(V)

    # Forms
    a, L = inner(grad(u), grad(v)) * dx, dot(Constant(1.0), v) * dx

    A, P = PETScMatrix(), PETScMatrix()
    b = PETScVector()

    # Assemble linear algebra objects
    assemble(a, tensor=A)  # noqa
    assemble(a, tensor=P)  # noqa
    assemble(L, tensor=b)  # noqa

    # Apply boundary conditions
    bc.apply(A)
    bc.apply(P)
    bc.apply(b)

    # Create Krysolv solver and set operators
    solver = PETScKrylovSolver("gmres", "bjacobi")
    solver.set_operators(A, P)

    # Solve
    x = PETScVector()
    num_iter_ref = solver.solve(x, b)

    # Change preconditioner matrix (bad matrix) and solve (PC will be
    # updated)
    a_p = u * v * dx
    assemble(a_p, tensor=P)  # noqa
    bc.apply(P)
    x = PETScVector()
    num_iter_mod = solver.solve(x, b)
    assert num_iter_mod > num_iter_ref

    # Change preconditioner matrix (good matrix) and solve (PC will be
    # updated)
    a_p = a
    assemble(a_p, tensor=P)  # noqa
    bc.apply(P)
    x = PETScVector()
    num_iter = solver.solve(x, b)
    assert num_iter == num_iter_ref

    # Change preconditioner matrix (bad matrix) and solve (PC will not
    # be updated)
    solver.set_reuse_preconditioner(True)
    a_p = u * v * dx
    assemble(a_p, tensor=P)  # noqa
    bc.apply(P)
    x = PETScVector()
    num_iter = solver.solve(x, b)
    assert num_iter == num_iter_ref

    # Update preconditioner (bad PC, will increase iteration count)
    solver.set_reuse_preconditioner(False)
    x = PETScVector()
    num_iter = solver.solve(x, b)
    assert num_iter == num_iter_mod
def solve(n_runs: int, mesh_size: int, element: FiniteElement,
          reference_tensor: ReferenceTensor, kernel_generator):
    # Whether to use custom kernels instead of FFC
    useCustomKernels = True

    # Generate a unit cube with (n+1)^3 vertices
    mesh = generate_mesh(mesh_size)
    print("Mesh generated.")

    A0 = reference_tensor

    Q = FunctionSpace(mesh, element)
    u = TrialFunction(Q)
    v = TestFunction(Q)

    # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1
    def boundary(x):
        return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS),
                      axis=1) > 0

    u0 = Constant(0.0)
    bc = DirichletBC(Q, u0, boundary)

    if useCustomKernels:
        # Initialize bilinear form and rhs
        a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object])
        L = dolfin.cpp.fem.Form([Q._cpp_object])

        # Signature of tabulate_tensor functions
        sig = nb.types.void(
            nb.types.CPointer(nb.types.double),
            nb.types.CPointer(nb.types.CPointer(nb.types.double)),
            nb.types.CPointer(nb.types.double), nb.types.intc)

        # Compile the python functions using Numba
        fnA = nb.cfunc(sig, cache=True,
                       nopython=True)(numba_kernels.tabulate_tensor_A)
        fnL = nb.cfunc(sig, cache=True,
                       nopython=True)(numba_kernels.tabulate_tensor_L)

        module_name = "_laplace_kernel"
        compile_poisson_kernel(module_name,
                               kernel_generator,
                               A0,
                               verbose=False)
        print("Finished compiling kernel.")

        # Import the compiled kernel
        kernel_mod = importlib.import_module(f"simd.tmp.{module_name}")
        ffi, lib = kernel_mod.ffi, kernel_mod.lib

        # Get pointer to the compiled function
        fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib,
                                                      "tabulate_tensor_A"))

        # Get pointers to Numba functions
        # fnA_ptr = fnA.address
        fnL_ptr = fnL.address

        # Configure Forms to use own tabulate functions
        a.set_cell_tabulate(0, fnA_ptr)
        L.set_cell_tabulate(0, fnL_ptr)
    else:
        # Use FFC

        # Bilinear form
        jit_result = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])

        # Rhs
        f = Expression("2.0", element=Q.ufl_element())
        jit_result = ffc_jit(f * v * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object])
        # Attach rhs expression as coefficient
        L.set_coefficient(0, f._cpp_object)
        print("Built form.")

    assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc])
    A = PETScMatrix()
    b = PETScVector()

    # Callable that performs assembly of matrix
    assembly_callable = lambda: assembler.assemble(
        A, dolfin.cpp.fem.Assembler.BlockType.monolithic)

    # Get timings for assembly of matrix over several runs
    time_avg, time_min, time_max = utils.timing(n_runs,
                                                assembly_callable,
                                                verbose=True)
    print(
        f"Timings for element matrix assembly (n={n_runs}) avg: {round(time_avg*1000, 2)}ms, min: {round(time_min*1000, 2)}ms, max: {round(time_max*1000, 2)}ms"
    )

    # Assemble again to get correct results
    A = PETScMatrix()
    assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic)
    assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic)

    Anorm = A.norm(dolfin.cpp.la.Norm.frobenius)
    bnorm = b.norm(dolfin.cpp.la.Norm.l2)
    print(Anorm, bnorm)

    # Check norms of assembled system
    if useCustomKernels:
        # Norms obtained with FFC and n=22
        assert (np.isclose(Anorm, 118.19435458024503))
        #assert (np.isclose(bnorm, 0.009396467472097566))

    return

    # Solve the system
    comm = L.mesh().mpi_comm()
    solver = PETScKrylovSolver(comm)

    u = Function(Q)
    solver.set_operator(A)
    solver.solve(u.vector(), b)

    # Export result
    file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf")
    file.write(u, XDMFFile.Encoding.HDF5)
Пример #12
0
b = PETScVector()

print("Running assembly...")
assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic)
assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic)

Anorm = A.norm(dolfin.cpp.la.Norm.frobenius)
bnorm = b.norm(dolfin.cpp.la.Norm.l2)
print("A.norm(frobenius)={:.12e}\nb.norm(l2)={:.12e}".format(Anorm, bnorm))

# Norms obtained with n=22 and bcs
assert (np.isclose(Anorm, 60.86192203436385))
assert (np.isclose(bnorm, 0.018075523965828778))

# Norms obtained with n=22 and no bcs
#assert (np.isclose(Anorm, 29.416127208482518))
#assert (np.isclose(bnorm, 0.018726593629987284))

print("Running solver...")
comm = L.mesh().mpi_comm()
solver = PETScKrylovSolver(comm)

u = Function(Q)
solver.set_operator(A)
solver.solve(u.vector(), b)

unorm = u.vector().norm(dolfin.cpp.la.Norm.l2)
print("u.norm(l2)={:.12e}".format(unorm))

assert (np.isclose(unorm, 5.1387821818667))
def assembly():
    # Whether to use custom kernels instead of FFC
    useCustomKernels = True

    # Generate a unit cube with (n+1)^3 vertices
    n = 20
    mesh = UnitCubeMesh(MPI.comm_world, n, n, n)
    Q = FunctionSpace(mesh, "Lagrange", 1)

    u = TrialFunction(Q)
    v = TestFunction(Q)

    def boundary0(x):
        wrong = np.logical_or(x[:, 1] < DOLFIN_EPS, x[:, 1] > 1.0 - DOLFIN_EPS)
        one = np.logical_or(x[:, 0] < DOLFIN_EPS, x[:, 0] > 1.0 - DOLFIN_EPS)
        two = np.logical_or(x[:, 2] < DOLFIN_EPS, x[:, 2] > 1.0 - DOLFIN_EPS)

        return np.logical_and(np.logical_or(one, two), np.logical_not(wrong))

    def boundary1(x):
        return np.logical_or(x[:, 1] < DOLFIN_EPS, x[:, 1] > 1.0 - DOLFIN_EPS)

    u0 = Constant(0.0)
    bc0 = DirichletBC(Q, u0, boundary0)

    u1 = Constant(1.0)
    bc1 = DirichletBC(Q, u1, boundary1)

    # Initialize bilinear form and rhs
    a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object])
    L = dolfin.cpp.fem.Form([Q._cpp_object])

    # Signature of tabulate_tensor functions
    sig = nb.types.void(nb.types.CPointer(nb.types.double),
                        nb.types.CPointer(nb.types.CPointer(nb.types.double)),
                        nb.types.CPointer(nb.types.double), nb.types.intc)

    # Compile the numba functions
    fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A)
    fnb = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_b)

    if useCustomKernels:
        # Configure Forms to use own tabulate functions
        a.set_cell_tabulate(0, fnA.address)
        L.set_cell_tabulate(0, fnb.address)
    else:
        # Use FFC
        jit_result = ffc_jit(dot(grad(u), grad(v)) * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object])
        f = Expression("20.0", element=Q.ufl_element())
        jit_result = ffc_jit(f * v * dx)
        ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0])
        L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object])
        L.set_coefficient(0, f._cpp_object)

    start = time.time()
    assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc0, bc1])
    A = PETScMatrix()
    b = PETScVector()
    assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic)
    assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic)
    end = time.time()

    print(f"Time for assembly: {(end-start)*1000.0}ms")

    Anorm = A.norm(dolfin.cpp.la.Norm.frobenius)
    bnorm = b.norm(dolfin.cpp.la.Norm.l2)

    print(Anorm, bnorm)

    # Norms obtained with FFC and n=20
    #assert (np.isclose(Anorm, 55.82812911070811))
    #assert (np.isclose(bnorm, 29.73261456296761))

    comm = L.mesh().mpi_comm()
    solver = PETScKrylovSolver(comm)

    u = Function(Q)

    solver.set_operator(A)
    solver.solve(u.vector(), b)

    file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf")
    file.write(u, XDMFFile.Encoding.HDF5)