def test_cffi_assembly():
    mesh = UnitSquareMesh(MPI.COMM_WORLD, 13, 13)
    V = FunctionSpace(mesh, ("Lagrange", 1))

    if mesh.mpi_comm().rank == 0:
        from cffi import FFI
        ffibuilder = FFI()
        ffibuilder.set_source(
            "_cffi_kernelA", r"""
        #include <math.h>
        #include <stdalign.h>
        void tabulate_tensor_poissonA(double* restrict A, const double* w,
                                    const double* c,
                                    const double* restrict coordinate_dofs,
                                    const int* entity_local_index,
                                    const int* cell_orientation)
        {
        // Precomputed values of basis functions and precomputations
        // FE* dimensions: [entities][points][dofs]
        // PI* dimensions: [entities][dofs][dofs] or [entities][dofs]
        // PM* dimensions: [entities][dofs][dofs]
        alignas(32) static const double FE3_C0_D01_Q1[1][1][2] = { { { -1.0, 1.0 } } };
        // Unstructured piecewise computations
        const double J_c0 = coordinate_dofs[0] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[2] * FE3_C0_D01_Q1[0][0][1];
        const double J_c3 = coordinate_dofs[1] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[5] * FE3_C0_D01_Q1[0][0][1];
        const double J_c1 = coordinate_dofs[0] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[4] * FE3_C0_D01_Q1[0][0][1];
        const double J_c2 = coordinate_dofs[1] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[3] * FE3_C0_D01_Q1[0][0][1];
        alignas(32) double sp[20];
        sp[0] = J_c0 * J_c3;
        sp[1] = J_c1 * J_c2;
        sp[2] = sp[0] + -1 * sp[1];
        sp[3] = J_c0 / sp[2];
        sp[4] = -1 * J_c1 / sp[2];
        sp[5] = sp[3] * sp[3];
        sp[6] = sp[3] * sp[4];
        sp[7] = sp[4] * sp[4];
        sp[8] = J_c3 / sp[2];
        sp[9] = -1 * J_c2 / sp[2];
        sp[10] = sp[9] * sp[9];
        sp[11] = sp[8] * sp[9];
        sp[12] = sp[8] * sp[8];
        sp[13] = sp[5] + sp[10];
        sp[14] = sp[6] + sp[11];
        sp[15] = sp[12] + sp[7];
        sp[16] = fabs(sp[2]);
        sp[17] = sp[13] * sp[16];
        sp[18] = sp[14] * sp[16];
        sp[19] = sp[15] * sp[16];
        // UFLACS block mode: preintegrated
        A[0] = 0.5 * sp[19] + 0.5 * sp[18] + 0.5 * sp[18] + 0.5 * sp[17];
        A[1] = -0.5 * sp[19] + -0.5 * sp[18];
        A[2] = -0.5 * sp[18] + -0.5 * sp[17];
        A[3] = -0.5 * sp[19] + -0.5 * sp[18];
        A[4] = 0.5 * sp[19];
        A[5] = 0.5 * sp[18];
        A[6] = -0.5 * sp[18] + -0.5 * sp[17];
        A[7] = 0.5 * sp[18];
        A[8] = 0.5 * sp[17];
        }

        void tabulate_tensor_poissonL(double* restrict A, const double* w,
                                     const double* c,
                                     const double* restrict coordinate_dofs,
                                     const int* entity_local_index,
                                     const int* cell_orientation)
        {
        // Precomputed values of basis functions and precomputations
        // FE* dimensions: [entities][points][dofs]
        // PI* dimensions: [entities][dofs][dofs] or [entities][dofs]
        // PM* dimensions: [entities][dofs][dofs]
        alignas(32) static const double FE4_C0_D01_Q1[1][1][2] = { { { -1.0, 1.0 } } };
        // Unstructured piecewise computations
        const double J_c0 = coordinate_dofs[0] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[2] * FE4_C0_D01_Q1[0][0][1];
        const double J_c3 = coordinate_dofs[1] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[5] * FE4_C0_D01_Q1[0][0][1];
        const double J_c1 = coordinate_dofs[0] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[4] * FE4_C0_D01_Q1[0][0][1];
        const double J_c2 = coordinate_dofs[1] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[3] * FE4_C0_D01_Q1[0][0][1];
        alignas(32) double sp[4];
        sp[0] = J_c0 * J_c3;
        sp[1] = J_c1 * J_c2;
        sp[2] = sp[0] + -1 * sp[1];
        sp[3] = fabs(sp[2]);
        // UFLACS block mode: preintegrated
        A[0] = 0.1666666666666667 * sp[3];
        A[1] = 0.1666666666666667 * sp[3];
        A[2] = 0.1666666666666667 * sp[3];
        }
        """)
        ffibuilder.cdef("""
        void tabulate_tensor_poissonA(double* restrict A, const double* w,
                                    const double* c,
                                    const double* restrict coordinate_dofs,
                                    const int* entity_local_index,
                                    const int* cell_orientation);
        void tabulate_tensor_poissonL(double* restrict A, const double* w,
                                    const double* c,
                                    const double* restrict coordinate_dofs,
                                    const int* entity_local_index,
                                    const int* cell_orientation);
        """)

        ffibuilder.compile(verbose=True)

    mesh.mpi_comm().Barrier()
    from _cffi_kernelA import ffi, lib

    ptrA = ffi.cast("intptr_t", ffi.addressof(lib, "tabulate_tensor_poissonA"))
    integrals = {IntegralType.cell: ([(-1, ptrA)], None)}
    a = cpp.fem.Form([V._cpp_object, V._cpp_object], integrals, [], [], False)

    ptrL = ffi.cast("intptr_t", ffi.addressof(lib, "tabulate_tensor_poissonL"))
    integrals = {IntegralType.cell: ([(-1, ptrL)], None)}
    L = cpp.fem.Form([V._cpp_object], integrals, [], [], False)

    A = dolfinx.fem.assemble_matrix(a)
    A.assemble()
    b = dolfinx.fem.assemble_vector(L)
    b.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)

    Anorm = A.norm(PETSc.NormType.FROBENIUS)
    bnorm = b.norm(PETSc.NormType.N2)
    assert (np.isclose(Anorm, 56.124860801609124))
    assert (np.isclose(bnorm, 0.0739710713711999))

    list_timings(MPI.COMM_WORLD, [TimingType.wall])
def test_cffi_assembly():
    mesh = UnitSquareMesh(MPI.comm_world, 13, 13)
    V = FunctionSpace(mesh, ("Lagrange", 1))

    if MPI.rank(mesh.mpi_comm()) == 0:
        from cffi import FFI
        ffibuilder = FFI()
        ffibuilder.set_source(
            "_cffi_kernelA", r"""
        #include <math.h>
        #include <stdalign.h>
        void tabulate_tensor_poissonA(double* restrict A, const double* w,
                                    const double* restrict coordinate_dofs,
                                    int cell_orientation)
        {
        // Precomputed values of basis functions and precomputations
        // FE* dimensions: [entities][points][dofs]
        // PI* dimensions: [entities][dofs][dofs] or [entities][dofs]
        // PM* dimensions: [entities][dofs][dofs]
        alignas(32) static const double FE3_C0_D01_Q1[1][1][2] = { { { -1.0, 1.0 } } };
        // Unstructured piecewise computations
        const double J_c0 = coordinate_dofs[0] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[2] * FE3_C0_D01_Q1[0][0][1];
        const double J_c3 = coordinate_dofs[1] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[5] * FE3_C0_D01_Q1[0][0][1];
        const double J_c1 = coordinate_dofs[0] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[4] * FE3_C0_D01_Q1[0][0][1];
        const double J_c2 = coordinate_dofs[1] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[3] * FE3_C0_D01_Q1[0][0][1];
        alignas(32) double sp[20];
        sp[0] = J_c0 * J_c3;
        sp[1] = J_c1 * J_c2;
        sp[2] = sp[0] + -1 * sp[1];
        sp[3] = J_c0 / sp[2];
        sp[4] = -1 * J_c1 / sp[2];
        sp[5] = sp[3] * sp[3];
        sp[6] = sp[3] * sp[4];
        sp[7] = sp[4] * sp[4];
        sp[8] = J_c3 / sp[2];
        sp[9] = -1 * J_c2 / sp[2];
        sp[10] = sp[9] * sp[9];
        sp[11] = sp[8] * sp[9];
        sp[12] = sp[8] * sp[8];
        sp[13] = sp[5] + sp[10];
        sp[14] = sp[6] + sp[11];
        sp[15] = sp[12] + sp[7];
        sp[16] = fabs(sp[2]);
        sp[17] = sp[13] * sp[16];
        sp[18] = sp[14] * sp[16];
        sp[19] = sp[15] * sp[16];
        // UFLACS block mode: preintegrated
        A[0] = 0.5 * sp[19] + 0.5 * sp[18] + 0.5 * sp[18] + 0.5 * sp[17];
        A[1] = -0.5 * sp[19] + -0.5 * sp[18];
        A[2] = -0.5 * sp[18] + -0.5 * sp[17];
        A[3] = -0.5 * sp[19] + -0.5 * sp[18];
        A[4] = 0.5 * sp[19];
        A[5] = 0.5 * sp[18];
        A[6] = -0.5 * sp[18] + -0.5 * sp[17];
        A[7] = 0.5 * sp[18];
        A[8] = 0.5 * sp[17];
        }

        void tabulate_tensor_poissonL(double* restrict A, const double* w,
                                     const double* restrict coordinate_dofs,
                                     int cell_orientation)
        {
        // Precomputed values of basis functions and precomputations
        // FE* dimensions: [entities][points][dofs]
        // PI* dimensions: [entities][dofs][dofs] or [entities][dofs]
        // PM* dimensions: [entities][dofs][dofs]
        alignas(32) static const double FE4_C0_D01_Q1[1][1][2] = { { { -1.0, 1.0 } } };
        // Unstructured piecewise computations
        const double J_c0 = coordinate_dofs[0] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[2] * FE4_C0_D01_Q1[0][0][1];
        const double J_c3 = coordinate_dofs[1] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[5] * FE4_C0_D01_Q1[0][0][1];
        const double J_c1 = coordinate_dofs[0] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[4] * FE4_C0_D01_Q1[0][0][1];
        const double J_c2 = coordinate_dofs[1] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[3] * FE4_C0_D01_Q1[0][0][1];
        alignas(32) double sp[4];
        sp[0] = J_c0 * J_c3;
        sp[1] = J_c1 * J_c2;
        sp[2] = sp[0] + -1 * sp[1];
        sp[3] = fabs(sp[2]);
        // UFLACS block mode: preintegrated
        A[0] = 0.1666666666666667 * sp[3];
        A[1] = 0.1666666666666667 * sp[3];
        A[2] = 0.1666666666666667 * sp[3];
        }
        """)
        ffibuilder.cdef("""
        void tabulate_tensor_poissonA(double* restrict A, const double* w,
                                    const double* restrict coordinate_dofs,
                                    int cell_orientation);
        void tabulate_tensor_poissonL(double* restrict A, const double* w,
                                    const double* restrict coordinate_dofs,
                                    int cell_orientation);
        """)

        ffibuilder.compile(verbose=True)

    MPI.barrier(mesh.mpi_comm())
    from _cffi_kernelA import ffi, lib

    a = cpp.fem.Form([V._cpp_object, V._cpp_object])
    ptrA = ffi.cast("intptr_t", ffi.addressof(lib, "tabulate_tensor_poissonA"))
    a.set_tabulate_cell(0, ptrA)

    L = cpp.fem.Form([V._cpp_object])
    ptrL = ffi.cast("intptr_t", ffi.addressof(lib, "tabulate_tensor_poissonL"))
    L.set_tabulate_cell(0, ptrL)

    A = dolfin.fem.assemble(a)
    b = dolfin.fem.assemble(L)

    Anorm = A.norm(PETSc.NormType.FROBENIUS)
    bnorm = b.norm(PETSc.NormType.N2)
    assert (np.isclose(Anorm, 56.124860801609124))
    assert (np.isclose(bnorm, 0.0739710713711999))

    list_timings([TimingType.wall])
示例#3
0
    for i in range(num_cells):
        kernel(ffi.from_buffer(Aarr), ffi.from_buffer(warr), ffi.from_buffer(constants), ffi.from_buffer(geometry))


run_pure(cexpr.module.tabulate_expression)

pure_ffcx_times = []

for i in range(repeats):
    t0 = time.time()
    run_pure(cexpr.module.tabulate_expression)
    tok = time.time() - t0
    print(f"Pure loop FFCx kernel {tok}")
    pure_ffcx_times.append(tok)

hand_kernel = ffi.cast("void(*)(double *, double *, double *, double *)", ffi.addressof(lib, "tabulate_expression"))

pure_hand_times = []
for i in range(repeats):
    t0 = time.time()
    run_pure(hand_kernel)
    tok = time.time() - t0
    print(f"Pure loop hand kernel {tok}")
    pure_hand_times.append(tok)

cexpr.module.tabulate_expression = hand_kernel

hand_times = []
for i in range(repeats):
    t0 = time.time()
    dolfiny.interpolation.interpolate_cached(cexpr, lam_hand)